library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("readxl")
library(tidyverse)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
otu <- read.table("ITS_OTUs_mod.txt", sep="\t", header=FALSE)
tax <- read.table("ITS_OTUs.UNITEv10_sh_99.wang.taxonomy", sep="\t", header=FALSE)
Chec do the otu names match
dif <- setdiff(tax$V1, otu$V1)
dif
## character(0)
Good, same OTUs in tax and OTU tables
Lets tweek the table row names and columns
# modify otu table
colnames(otu)=otu[c(1),]
# erase the first row, as now it is doubled
otu=otu[-c(1),]
# make first column into rownames
rownames(otu) <- otu$`OTU ID`
otu <- otu[, -c(1)]
# let's make a copy of tax table
tax.orig <- tax
# change column names
colnames(tax)[1] <- "OTU"
colnames(tax)[2] <- "taxa"
# and modify the tax, where in the str_remove_all . means any single character
tax <- tax %>%
mutate(taxa = str_remove_all(taxa, ".__")) %>%
separate(taxa,
into = c("kingdom", "phylum", "class", "order", "family", "genus", "species"),
sep = ";")
Everything went well but I got a warning message, because there are an extra ; at the end of the line so the last column is not made. which is correct. Let’s check if we now have NAs, just to check everything is ok.
sum(is.na(tax$OTU))
## [1] 0
sum(is.na(tax$kingdom))
## [1] 0
sum(is.na(tax$phylum))
## [1] 0
sum(is.na(tax$class))
## [1] 0
sum(is.na(tax$order))
## [1] 0
sum(is.na(tax$family))
## [1] 0
sum(is.na(tax$genus))
## [1] 0
sum(is.na(tax$species))
## [1] 0
Everything is ok since no column NA’s
Let’s remove the parentheses and numbers
tax$kingdom <- sub("\\(.*", "", tax$kingdom)
tax$phylum <- sub("\\(.*", "", tax$phylum)
tax$class <- sub("\\(.*", "", tax$class)
tax$order <- sub("\\(.*", "", tax$order)
tax$family <- sub("\\(.*", "", tax$family)
tax$genus <- sub("\\(.*", "", tax$genus)
tax$species <- sub("\\(.*", "", tax$species)
Check unique values of the higher taxons
paste("unique kingdoms: ", unique(tax$kingdom))
## [1] "unique kingdoms: Fungi"
paste("And the unique phyla are: ")
## [1] "And the unique phyla are: "
unique(tax$phylum)
## [1] "Ascomycota" "Basidiomycota" "Fungi_unclassified"
## [4] "Rozellomycota" "Basidiobolomycota" "Chytridiomycota"
## [7] "Mortierellomycota" "unclassified" "Glomeromycota"
## [10] "Mucoromycota" "Zoopagomycota" "Monoblepharomycota"
## [13] "Olpidiomycota" "Kickxellomycota" "Entorrhizomycota"
## [16] "Aphelidiomycota" "Neocallimastigomycota"
samples <- read.csv2('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile\\sample_data_updated_root_biomass.csv')
# change some column names
colnames(samples)[which(names(samples) == "production_type")] <- "sample_type"
The sample names in otu-table and sample-table do not match, because I have modified the after sequencing. However, I still have the original sample names as an ID column in sample-table so let’s use this to make the otu-table sample names to match.
x <- otu
x <- t(x)
x <- as.data.frame(x)
x <- rownames_to_column(x)
# pick the ID and sample names
y <- samples[, c(1,2)]
# add the sample names to x
x <- left_join(y, x, by = c("ID"="rowname"))
# remove ID
x <- x[, -c(1)]
# make sample names into rownames
rownames(x) <- x$sampleID
# remove the sampleID column
x <- x[, -c(1)]
x <- t(x)
otu <- x
rm(x)
rm(y)
# make into data frame
otu <- as.data.frame(otu)
# make sure numbers are numbers
otu <- otu %>% mutate_if(is.character, as.numeric)
# make OTUs into row names for tax-table and sample data
rownames(tax) <- tax$OTU
tax <- tax[, -c(1)]
rownames(samples) <- samples$sampleID
samples <- samples[, -c(1)]
# make all the elements, otu, tax and sample data, into phyloseq compatible format
samples <- sample_data(samples)
otu <- as.matrix(otu)
otu <- otu_table(otu, taxa_are_rows = TRUE)
tax <- tax_table(as.matrix(tax))
# finally combine all the elemnt to make a phyloseq object
ps <- phyloseq(otu, tax, samples)
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 70198 taxa and 142 samples ]
## sample_data() Sample Data: [ 142 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 70198 taxa by 7 taxonomic ranks ]
Note: Unclassified phyla are called either:
“Fungi_unclassified” or “unclassified”
# let's first remove the two controls, which by the way had zero OTUs
ps <- subset_samples(ps, sampleID != "0ctrl-1")
ps <- subset_samples(ps, sampleID != "0ctrl-2")
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 70198 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 70198 taxa by 7 taxonomic ranks ]
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps),
MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps = prune_taxa((prev0 > 0), ps)
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 70198 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 70198 taxa by 7 taxonomic ranks ]
rm(prev0)
rm(tax.orig)
#I will remove two: "Fungi_unclassified" and "unclassified"
ps <- phyloseq::subset_taxa(ps, phylum != "Fungi_unclassified" & phylum != "unclassified")
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 68186 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 68186 taxa by 7 taxonomic ranks ]
ps_sng <- ps
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps_sng, file='ps_phyloseq_with_sng')
ps <- filter_taxa(ps, function (x) {sum(x > 0) > 1}, prune=TRUE)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
Without singletons, there are 20610 OTUs
Let’s get some other data from the ps
summarize_phyloseq(ps)
## [[1]]
## [1] "1] Min. number of reads = 44"
##
## [[2]]
## [1] "2] Max. number of reads = 410851"
##
## [[3]]
## [1] "3] Total number of reads = 11539503"
##
## [[4]]
## [1] "4] Average number of reads = 82425.0214285714"
##
## [[5]]
## [1] "5] Median number of reads = 79653.5"
##
## [[6]]
## [1] "7] Sparsity = 0.928178415470992"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
##
## [[8]]
## [1] "8] Number of singletons = 0"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 21"
##
## [[11]]
## [1] "sampleID" "plot" "sampling_position"
## [4] "depth" "depth_numerical" "vegetation"
## [7] "sample_type" "root_mgg" "pH_H2O"
## [10] "EC_uScm" "C_g_per_kg" "N_gkg"
## [13] "TP_gkg" "Alox_mmolkg" "Feox_mmolkg"
## [16] "oxides_mmolkg" "PH2O_mgkg" "Porg_mgkg"
## [19] "DOC_mgkg" "Pinorg_mgkg" "C_per_N"
readcount(ps) %>% sort(decreasing = FALSE) %>% head()
## CG9.1_40to70 NG2B3_30to40 NG2B2_40to70 CPO5.2_40to70 CG9.3_40to70
## 44 526 811 7818 8588
## OG10.1_40to80
## 9882
There is only one sample with low read count (it is conventional treatment deep sample) whereas the others have quite ok (more than 500). However, I tested (not shown here) that keeping the low read count sample in the data does not change any of the analysis so it may still represent the fungal community(?)
I noticed that there is a mistake in the taxonomy, so that sometimes I have “unclassified” annotation at lets say genus level, although I have annotation at e.g. family level
In these cases I rather have at the genus level the annotation “family_unclassified”
tax <- as.data.frame(tax_table(ps))
I checked that there is no unclassified at phylum level
So let’s change first at class level.
tax$class <- ifelse(tax$class == "unclassified", paste(tax$phylum, sep = "_", "unclassified"), as.character(tax$class))
And do same for all rest of the ranks, but I have to replace the “_unclassified_unclassified” with “_unclassified” afterwards
I know, there must be a more elegant way to do this:)
Change for order
tax$order <- ifelse(tax$order == "unclassified", paste(tax$class, sep = "_", "unclassified"), as.character(tax$order))
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)
Change for family
tax$family <- ifelse(tax$family == "unclassified", paste(tax$order, sep = "_", "unclassified"), as.character(tax$family))
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)
Change for genus
tax$genus <- ifelse(tax$genus == "unclassified", paste(tax$family, sep = "_", "unclassified"), as.character(tax$genus))
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)
Change for species
tax$species <- ifelse(tax$species == "unclassified", paste(tax$genus, sep = "_", "unclassified"), as.character(tax$species))
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)
Save the new modified ps as the new “final version”
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
tax_table(ps) <- tax_table(as.matrix(tax))
save(ps, file='ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
# change natural_grass to meadow
meta <- data.frame(lapply(meta, function(x) {gsub("Natural_grass", "meadow", x)}))
rownames(meta) <- meta$sampleID
# Column indices to be converted numeric
i <- c(5, 8:21)
meta[, i] <- apply(meta[, i], 2, function(x) as.numeric(as.character(x)))
# lets chance the depth to soil layer mid-depth
meta$depth_numerical[meta$depth_numerical == 0] <- 5
meta$depth_numerical[meta$depth_numerical == 10] <- 15
meta$depth_numerical[meta$depth_numerical == 20] <- 25
meta$depth_numerical[meta$depth_numerical == 30] <- 35
meta$depth_numerical[meta$depth_numerical == 40] <- 60
sample_data(ps) <- sample_data(meta)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps, file='ps_FINAL')
# calculate all diversity measures
div <- microbiome::alpha(ps, index = "all")
# Assign the estimated diversity and richness to sample metadata
sample_data(ps)$observed <- div$observed
sample_data(ps)$chao1 <- div$chao1
sample_data(ps)$shannon <- div$diversity_shannon
meta <- meta(ps)
meta$sample_type <- as.factor(meta$sample_type)
meta$sample_type <- factor(meta$sample_type, levels = c("forest", "meadow", "organic", "conventional"))
sample_data(ps) <- sample_data(meta)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps, file='ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 24 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
I will use the one without singletons, but just for comparison, I also calculate with singletons
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_phyloseq_with_sng')
ps_sng
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 68186 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 21 sample variables ]
## tax_table() Taxonomy Table: [ 68186 taxa by 7 taxonomic ranks ]
div <- microbiome::alpha(ps_sng, index = "all")
# Assign the estimated diversity to sample metadata
sample_data(ps)$observed_sng <- div$observed
sample_data(ps)$chao1_sng <- div$chao1
sample_data(ps)$shannon_sng <- div$diversity_shannon
meta <- meta(ps)
save(ps, file='ps_FINAL')
# create your own color palette for sample types
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
# OTU richness
OTU_rich <- meta %>%
dplyr::group_by(sample_type, depth_numerical) %>%
dplyr::summarise(mean = mean(observed, na.rm = TRUE), se = (sd(observed, na.rm = TRUE)/sqrt(length((observed))))) %>%
dplyr::ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=18),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="OTU richness") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
OTU_rich
# shannon
shannon <- meta %>%
dplyr::group_by(sample_type, depth_numerical) %>%
dplyr::summarise(mean = mean(shannon, na.rm = TRUE), se = (sd(shannon, na.rm = TRUE)/sqrt(length((shannon))))) %>%
dplyr::ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=18),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Shannon") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
shannon
# OTU richness
OTU_rich_sng <- meta %>%
dplyr::group_by(sample_type, depth_numerical) %>%
dplyr::summarise(mean = mean(observed_sng, na.rm = TRUE), se = (sd(observed_sng, na.rm = TRUE)/sqrt(length((observed_sng))))) %>%
dplyr::ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=18),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="OTU richness") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
OTU_rich_sng
Keeping the singletons does not change the comparison of soil layers and treatments
# shannon
shannon_sng <- meta %>%
dplyr::group_by(sample_type, depth_numerical) %>%
dplyr::summarise(mean = mean(shannon_sng, na.rm = TRUE), se = (sd(shannon_sng, na.rm = TRUE)/sqrt(length((shannon_sng))))) %>%
dplyr::ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=18),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Shannon") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
shannon
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("RColorBrewer") # nice color options
library(multcompView)
library(rcompanion)
library(car)
library(multcomp)
library(stringr)
library(ggrepel)
library(MicEco)
library(metagMisc)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
This is how one would acquire the funguild database, but I will use the one I previously acquired
#FG <- parse_funguild()
#attr(FG, "DownloadDate") # Check when the database was downloaded
#setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#save FG
#write.csv2(FG, file = "FUNGuild_31_05_2024.csv", row.names = FALSE)
I have previously uploaded the FUNGuild database and will use that version here for continuity. I used a version downloaded: “Fri May 31 19:45:41 2024”
I need to annotate separately at different taxonomic level
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
FG <- read.csv2("FUNGuild_31_05_2024.csv")
# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus" "Species" "Variety" "Family" "Order"
## [6] "Phylum" "Form" "Subspecies"
I will annotate with Species, Genus, Family, Order, Phylum level
fg <- FG[FG$taxonomicLevel == "Species", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# fg_sp doesn't have underscores, so lets add them
fg$taxon <- sub(" ", "_", fg$taxon)
# merge tables
colnames(fg)[1] <- "species"
FG_tax_table <- merge(tax_table, fg, by = "species", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_sp"
colnames(FG_tax_table)[2] <- "guild_sp"
# save with new name
FUNGuild_sp <- FG_tax_table
fg <- FG[FG$taxonomicLevel == "Genus", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
# and OTU as row names
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"
# save with new name
FUNGuild_gen <- FG_tax_table
fg <- FG[FG$taxonomicLevel == "Family", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"
# save with new name
FUNGuild_fam <- FG_tax_table
fg <- FG[FG$taxonomicLevel == "Order", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"
# save with new name
FUNGuild_ord <- FG_tax_table
fg <- FG[FG$taxonomicLevel == "Phylum", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"
# save with new name
FUNGuild_phy <- FG_tax_table
species Genus Family Order Phylum
x <- left_join(rownames_to_column(FUNGuild_sp), rownames_to_column(FUNGuild_gen), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA: now from genus
y <- x %>%
mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_gen))
# get the value from another column if NA: now from genus
y <- y %>%
mutate(guild_sp = coalesce(guild_sp,guild_gen))
# same for family
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_fam))
# get the value from another column if NA
y <- y %>%
mutate(guild_sp = coalesce(guild_sp,guild_fam))
# same for order
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_ord))
# get the value from another column if NA
y <- y %>%
mutate(guild_sp = coalesce(guild_sp,guild_ord))
# same for phylum
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_phy))
# get the value from another column if NA
y <- y %>%
mutate(guild_sp = coalesce(guild_sp,guild_phy))
# then rename the sp columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"
# remove the rest of the columns
y <- y[, -c(4:11)]
# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# how many of different trophic modes and NAs?
table(y2$trophicMode, useNA = "ifany")
##
## Pathotroph Pathotroph-Pathotroph-Saprotroph
## 20 7
## Pathotroph-Saprotroph-Symbiotroph Saprotroph
## 7 37
## Symbiotroph Pathotroph
## 148 1480
## Pathotroph-Saprotroph Pathotroph-Saprotroph-Symbiotroph
## 1973 977
## Pathotroph-Symbiotroph Saprotroph
## 98 4805
## Saprotroph-Symbiotroph Symbiotroph
## 1179 636
## <NA>
## 9243
9243/20610
## [1] 0.4484716
So approximately 44.8% are NAs
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]
ps_FG <- phyloseq(otu_table(ps), tax_table(as.matrix(y3)), sample_data(ps))
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 10 taxonomic ranks ]
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps_FG, file = 'ps_FG_with_NAs')
20610 taxa; contains all the NAs
and remove also “|”
FG_tax_table <- as.data.frame(tax_table(ps_FG))
FG_tax_table$trophicMode <- gsub(" ", "", FG_tax_table$trophicMode, fixed = TRUE)
FG_tax_table$guild <- gsub("|", "", FG_tax_table$guild, fixed = TRUE)
I will add a column “FUNGuild” where I curate some of the symbiotroph and pathotroph fungi according to my research interests. I am especially interested of AMF as they are important mycorrhiza in arable soils, but I also want to separate Ectomycorrhiza as they are important in forest. This leaves one more relatively big symbiotrophic guild, endophytes, which I will also specify. So the following curation will be done for FUNGuild column:
FG_tax_table <- FG_tax_table %>%
mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
FG_tax_table <- FG_tax_table %>%
mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" | FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" | FG != "Arbuscular Mycorrhizal" | FG != "Endophyte" | FG != "Plant Pathogen" ~ FG_tax_table$trophicMode))
# remove the FG column
FG_tax_table <- FG_tax_table[, c(1:10, 12)]
Check the different written forms, if I have empty spaces?
unique(FG_tax_table$trophicMode)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Symbiotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph"
## [7] "Saprotroph" "Pathotroph-Symbiotroph"
## [9] "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph"
## [7] "Saprotroph" "Endophyte"
## [9] "Ectomycorrhizal" "Pathotroph-Symbiotroph"
## [11] "Plant Pathogen" "Symbiotroph"
## [13] "Pathotroph-Pathotroph-Saprotroph"
There is a “bug” in the FUNGuild data, so that in addition to Pathotroph-Saprotroph there is the “wrong” one Pathotroph-Pathotroph-Saprotroph etc. I will correct these.
Also I will name Symbiotroh into Other Symbiotroph as I have extracted the AMF, EcM etc. from the symbiotroph trophic mode
# change some names
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]
Check again
unique(FG_tax_table$trophicMode)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Symbiotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph"
## [7] "Saprotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Other Pathotroph"
## [7] "Saprotroph" "Endophyte"
## [9] "Ectomycorrhizal" "Pathotroph-Symbiotroph"
## [11] "Plant Pathogen" "Other Symbiotroph"
ps_FG <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps_FG, file = 'ps_FG_with_NAs')
# lets calculate how big percentage of sequences where assigned (are not unknown)
ps_FG_RA <- transform(ps_FG, "compositional")
FG_RA_TmG <- aggregate_rare(ps_FG_RA, level = 'FUNGuild', detection = 0/100, prevalence = 0/100)
FG_RA_TmG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 12 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 12 taxa by 2 taxonomic ranks ]
glom <- tax_glom(FG_RA_TmG, taxrank = 'FUNGuild')
percentages <- psmelt(glom)
df <- percentages %>%
group_by(OTU) %>%
summarise_at(vars(Abundance), list(name = mean))
df
## # A tibble: 12 × 2
## OTU name
## <chr> <dbl>
## 1 Arbuscular Mycorrhizal 0.0240
## 2 Ectomycorrhizal 0.0305
## 3 Endophyte 0.0206
## 4 Other Pathotroph 0.0323
## 5 Other Symbiotroph 0.000308
## 6 Pathotroph-Saprotroph 0.0850
## 7 Pathotroph-Saprotroph-Symbiotroph 0.0324
## 8 Pathotroph-Symbiotroph 0.00580
## 9 Plant Pathogen 0.0225
## 10 Saprotroph 0.292
## 11 Saprotroph-Symbiotroph 0.150
## 12 Unknown 0.305
Note, 30.5% of reads were not assigned (smaller percentage than for the OTUs)
Check also the percentage of NAs within the four sample types: forest, meadow, organic and conventional. Do they differ?
FG_RA_Tm <- aggregate_rare(ps_FG_RA, level = 'trophicMode', detection = 0/100, prevalence = 0/100)
glom <- tax_glom(FG_RA_Tm, taxrank = 'trophicMode')
percentages <- psmelt(glom)
df <- percentages %>%
group_by(sample_type, OTU) %>%
summarise_at(vars(Abundance), list(name = mean))
df
## # A tibble: 32 × 3
## # Groups: sample_type [4]
## sample_type OTU name
## <fct> <chr> <dbl>
## 1 forest Pathotroph 0.0104
## 2 forest Pathotroph-Saprotroph 0.0841
## 3 forest Pathotroph-Saprotroph-Symbiotroph 0.0315
## 4 forest Pathotroph-Symbiotroph 0.000181
## 5 forest Saprotroph 0.154
## 6 forest Saprotroph-Symbiotroph 0.176
## 7 forest Symbiotroph 0.261
## 8 forest Unknown 0.283
## 9 meadow Pathotroph 0.0266
## 10 meadow Pathotroph-Saprotroph 0.0568
## # ℹ 22 more rows
management NAs Forest 28.3 meadow 23.8 organic 31.8 conventional 35.9
Here, I will check the number of reads and OTUs and construct venn-diagrams for sample types (or management type) and soil layers.
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library("MicEco")
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA <- microbiome::transform(ps, "compositional")
meta <- meta(ps)
summarize_phyloseq(ps)
## [[1]]
## [1] "1] Min. number of reads = 44"
##
## [[2]]
## [1] "2] Max. number of reads = 410851"
##
## [[3]]
## [1] "3] Total number of reads = 11539503"
##
## [[4]]
## [1] "4] Average number of reads = 82425.0214285714"
##
## [[5]]
## [1] "5] Median number of reads = 79653.5"
##
## [[6]]
## [1] "7] Sparsity = 0.928178415470992"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
##
## [[8]]
## [1] "8] Number of singletons = 0"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 27"
##
## [[11]]
## [1] "sampleID" "plot" "sampling_position"
## [4] "depth" "depth_numerical" "vegetation"
## [7] "sample_type" "root_mgg" "pH_H2O"
## [10] "EC_uScm" "C_g_per_kg" "N_gkg"
## [13] "TP_gkg" "Alox_mmolkg" "Feox_mmolkg"
## [16] "oxides_mmolkg" "PH2O_mgkg" "Porg_mgkg"
## [19] "DOC_mgkg" "Pinorg_mgkg" "C_per_N"
## [22] "observed" "chao1" "shannon"
## [25] "observed_sng" "chao1_sng" "shannon_sng"
We obtained 11662127 fungal reads which clustered into 31714 OTUs in the 140 samples.
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
# Note, relative is false, because I am using an alredy relative abundance transformed phyloseq object.
venn_no_prev <- ps_venn(
ps_RA,
"sample_type",
fraction = 0,
weight = FALSE,
relative = FALSE,
plot = TRUE,
quantities = list(cex=0.7),
fill = MyPalette
)
venn_no_prev
How many OTUs are shared by all sample types? I will pick the shared by all from the figure and divide it with the total OTU number to get the percentage
shared_by_all <- venn_no_prev[["data"]][["original.values"]][["forest&meadow&organic&conventional"]]
total_OTUs <- 20610
shared_by_all/total_OTUs
## [1] 0.1246967
12.5% of OTUs were shared by all management
meta$new_depth <- meta$depth
meta$new_depth <- gsub("...", "-", meta$new_depth, fixed = TRUE)
meta$new_depth[meta$new_depth=="40-"] <- "40-80"
meta$new_depth[meta$new_depth=="0-10"] <- "0-10 cm"
meta$new_depth[meta$new_depth=="10-20"] <- "10-20 cm"
meta$new_depth[meta$new_depth=="20-30"] <- "20-30 cm"
meta$new_depth[meta$new_depth=="30-40"] <- "30-40 cm"
meta$new_depth[meta$new_depth=="40-80"] <- "40-80 cm"
sample_data(ps) <- sample_data(meta)
ps_RA <- transform(ps, "compositional")
# create your own color palette for sample types
MyPalette <- list(c('0-10 cm' = "#387212", '10-20 cm' = "#ADC476", '20-30 cm' = "#D8D2BA",'30-40 cm' = "#907852", '40-80 cm' = "#6A4C3A"))
venn_DEPTH <- ps_venn(
ps_RA,
"new_depth",
fraction = 0,
weight = FALSE,
relative = TRUE,
quantities = list(cex=0.7),
plot = TRUE,
fill = MyPalette[[1]]
)
venn_DEPTH
Again, how many OTUs are shared by all layers?
shared_by_all <- venn_DEPTH[["data"]][["original.values"]][["0-10 cm&10-20 cm&20-30 cm&30-40 cm&40-80 cm"]]
total_OTUs <- 20610
shared_by_all/total_OTUs
## [1] 0.04885978
4.9% of OTUs were shared by all layers
How much of the OTUs in the dataset were found in the first, first two or first three soil layers?
ps_x <- subset_samples(ps, depth!="30...40" & depth!="40...")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20130 taxa and 84 samples ]
## sample_data() Sample Data: [ 84 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20130 taxa by 7 taxonomic ranks ]
In the first three layers: 20130 OTUs
OTUs <- 20130
total_OTUs <- 20610
OTUs/total_OTUs
## [1] 0.9767103
97.7% of all OTUs were found in the first 3 soil layers
ps_x <- subset_samples(ps, depth!="20...30" & depth!="30...40" & depth!="40...")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 18392 taxa and 56 samples ]
## sample_data() Sample Data: [ 56 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 18392 taxa by 7 taxonomic ranks ]
In the first three layers: 18392 OTUs
OTUs <- 18392
total_OTUs <- 20610
OTUs/total_OTUs
## [1] 0.8923823
89.2% of all OTUs were found in the first 2 soil layers
ps_x <- subset_samples(ps, depth=="0...10")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14737 taxa and 28 samples ]
## sample_data() Sample Data: [ 28 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 14737 taxa by 7 taxonomic ranks ]
OTUs <- 14737
total_OTUs <- 20610
OTUs/total_OTUs
## [1] 0.7150412
71.5% of all OTUs were found in the first soil layer
library(ggpubr)
fig <- ggarrange(venn_no_prev, venn_DEPTH,
ncol = 2, nrow = 1)
fig
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(goeveg)
library(metagMisc)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
hist(meta$root_mgg)
#perform Shapiro-Wilk Test
shapiro.test(meta$root_mgg)
##
## Shapiro-Wilk normality test
##
## data: meta$root_mgg
## W = 0.35847, p-value < 2.2e-16
#log transformation
meta <- meta %>%
mutate(log_root = log10(root_mgg))
hist(meta$log_root)
#perform Shapiro-Wilk Test
shapiro.test(meta$log_root)
##
## Shapiro-Wilk normality test
##
## data: meta$log_root
## W = 0.98109, p-value = 0.04971
#now is normally distributed since p value is more than 0.05
sample_data(ps) <- sample_data(meta)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps, file='ps_FINAL')
ps_RA <- microbiome::transform(ps, "compositional")
To visualize beta diversity, I will do a PCoA which is metric instead of e.g. non-metric NMDS
I will be following somewhat this tutorial:
OTU = as(otu_table(ps_RA), "matrix")
# transpose if necessary
if(taxa_are_rows(ps_RA)){OTU <- t(OTU)}
# Coerce to data.frame
OTU = as.data.frame(OTU)
OTU <- as.matrix(OTU)
bray_dist <- vegan::vegdist(OTU, method="bray")
str(bray_dist)
## 'dist' Named num [1:9730] 0.367 0.694 0.967 0.723 0.47 ...
## - attr(*, "maxdist")= num 1
## - attr(*, "Size")= int 140
## - attr(*, "Labels")= chr [1:140] "CG9.1_0to10" "CG9.1_10to20" "CG9.1_20to30" "CG9.1_30to40" ...
## - attr(*, "Diag")= logi FALSE
## - attr(*, "Upper")= logi FALSE
## - attr(*, "method")= chr "bray"
## - attr(*, "call")= language vegan::vegdist(x = OTU, method = "bray")
# use k = 3 so we'll get calculations for three axes
pcoa <- cmdscale(bray_dist, eig=TRUE, k = 3)
ordiplot(scores(pcoa),display="sites", type="points")
Let’s first make PCoA ordination with axes 1 and 2, and later for 1 and 3.
# Post-Hoc Projections of environmental variables
# envfit in vegan projects points onto vectors that have maximum correlation with corresponding environmental variables
pcoa.env12 <- envfit(pcoa, meta[ ,c("pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "sample_type", "depth_numerical", "DOC_mgkg", "Pinorg_mgkg", "Porg_mgkg", "log_root", "C_per_N", "Feox_mmolkg", "Alox_mmolkg")], na.rm = TRUE, choices=c(1:2), permutations = 999)
# main effects
pcoa.env12
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## pH_H2O 0.89803 -0.43993 0.4996 0.001 ***
## C_g_per_kg -0.92647 0.37636 0.5577 0.001 ***
## N_gkg -0.95072 0.31005 0.5692 0.001 ***
## TP_gkg -0.93787 -0.34699 0.5026 0.001 ***
## depth_numerical 0.98087 0.19465 0.5300 0.001 ***
## DOC_mgkg -0.76599 0.64286 0.4138 0.001 ***
## Pinorg_mgkg 0.17893 -0.98386 0.0740 0.010 **
## Porg_mgkg -0.99894 0.04605 0.5753 0.001 ***
## log_root -0.80130 0.59827 0.4884 0.001 ***
## C_per_N -0.97551 0.21996 0.4224 0.001 ***
## Feox_mmolkg -0.84349 0.53715 0.4468 0.001 ***
## Alox_mmolkg -0.67701 0.73598 0.3176 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## Dim1 Dim2
## sample_typeforest 0.0679 0.1867
## sample_typemeadow -0.0229 0.1780
## sample_typeorganic -0.0098 -0.1293
## sample_typeconventional 0.0006 -0.1050
##
## Goodness of fit:
## r2 Pr(>r)
## sample_type 0.2013 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## 1 observation deleted due to missingness
ef12.adj <- pcoa.env12
pvals.adj <- p.adjust (pcoa.env12$vectors$pvals, method = 'bonferroni')
ef12.adj$vectors$pvals <- pvals.adj
ef12.adj
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## pH_H2O 0.89803 -0.43993 0.4996 0.012 *
## C_g_per_kg -0.92647 0.37636 0.5577 0.012 *
## N_gkg -0.95072 0.31005 0.5692 0.012 *
## TP_gkg -0.93787 -0.34699 0.5026 0.012 *
## depth_numerical 0.98087 0.19465 0.5300 0.012 *
## DOC_mgkg -0.76599 0.64286 0.4138 0.012 *
## Pinorg_mgkg 0.17893 -0.98386 0.0740 0.120
## Porg_mgkg -0.99894 0.04605 0.5753 0.012 *
## log_root -0.80130 0.59827 0.4884 0.012 *
## C_per_N -0.97551 0.21996 0.4224 0.012 *
## Feox_mmolkg -0.84349 0.53715 0.4468 0.012 *
## Alox_mmolkg -0.67701 0.73598 0.3176 0.012 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## Dim1 Dim2
## sample_typeforest 0.0679 0.1867
## sample_typemeadow -0.0229 0.1780
## sample_typeorganic -0.0098 -0.1293
## sample_typeconventional 0.0006 -0.1050
##
## Goodness of fit:
## r2 Pr(>r)
## sample_type 0.2013 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## 1 observation deleted due to missingness
To plot (classical) mds (which is equivalent to PCoA) with ggplot a new datasheet needs to be created which contains the x,y points for each site. You can do this by calling the scores of you mds.
site.scrs <- as.data.frame(scores(pcoa, display = "sites")) #save pcoa results into dataframe
site.scrs <- cbind(site.scrs, soil_type = meta$sample_type) #add grouping variable "soil_type" to dataframe
site.scrs <- cbind(site.scrs, depth = meta$depth) #add grouping variable of depth
head(site.scrs)
## Dim1 Dim2 Dim3 soil_type depth
## CG9.1_0to10 -0.32486951 -0.16939677 0.057117223 conventional 0...10
## CG9.1_10to20 -0.35683666 -0.18451022 0.010039481 conventional 10...20
## CG9.1_20to30 0.10483906 -0.26374886 -0.239911662 conventional 20...30
## CG9.1_30to40 0.49569905 -0.18888465 -0.132639765 conventional 30...40
## CG9.1_40to70 0.03411974 -0.01592131 -0.009914955 conventional 40...
## CG9.2_0to10 -0.24690800 -0.12969297 0.145831464 conventional 0...10
I will be following somewhat this tutorial for fitting the environmental variables etc:
To show environmental extrinsic variables another datasheet needs to be created
Citation from the jkzorz github
“Extracting the required information from the envfit result is a bit more complicated. The envfit output contains information on the length of the segments for each variable. The segments are scaled to the r2 value, so that the environmental variables with a longer segment are more strongly correlated with the data than those with a shorter segment. You can extract this information with scores. Then these lengths are further scaled to fit the plot. This is done with a multiplier that is analysis specific, and can be accessed using the command ordiArrowMul(en). Below I multiply the scores by this multiplier to keep the coordinates in the correct proportion.”
Because my data contained continuous and categorical environmental variables, Im extracting the information from both separately using the vectors and factors options respectively.
# first categorial variables
env.scores_cat12 <- as.data.frame(scores(pcoa.env12, display = "factors"))*0.25 # multiply the envfit scores to keep them in the frame of the ordination
env.scores_cat12 <- cbind(env.scores_cat12, env.variables = rownames(env.scores_cat12)) #and then gives them their names
env.scores_cat12 <- cbind(env.scores_cat12, pval = pcoa.env12$factors$pvals) # add pvalues to dataframe
sig.env.scores_cat12 <- subset(env.scores_cat12, pval<=0.05) #subset data to show variables significant at 0.05
sig.env.scores_cat12
## Dim1 Dim2 env.variables pval
## sample_typeforest 0.0169631533 0.04667273 sample_typeforest 0.001
## sample_typemeadow -0.0057255852 0.04448833 sample_typemeadow 0.001
## sample_typeorganic -0.0024526659 -0.03233332 sample_typeorganic 0.001
## sample_typeconventional 0.0001453261 -0.02625329 sample_typeconventional 0.001
# all were significant
# then continous variables
env.scores_cont12 <- as.data.frame(scores(pcoa.env12, display = "vectors"))*0.25 # multiply the envfit scores to keep them in the frame of the ordination
env.scores_cont12 <- cbind(env.scores_cont12, env.variables = rownames(env.scores_cont12)) #and then gives them their names
env.scores_cont12 <- cbind(env.scores_cont12, pval = pcoa.env12$vectors$pvals) # add pvalues to dataframe
sig.env.scores_cont12 <- subset(env.scores_cont12, pval<=0.05) #subset data to show variables significant at 0.05
sig.env.scores_cont12
## Dim1 Dim2 env.variables pval
## pH_H2O 0.15868133 -0.077735176 pH_H2O 0.001
## C_g_per_kg -0.17297071 0.070265684 C_g_per_kg 0.001
## N_gkg -0.17931771 0.058479126 N_gkg 0.001
## TP_gkg -0.16622910 -0.061501047 TP_gkg 0.001
## depth_numerical 0.17852191 0.035426555 depth_numerical 0.001
## DOC_mgkg -0.12318024 0.103379224 DOC_mgkg 0.001
## Pinorg_mgkg 0.01216617 -0.066897767 Pinorg_mgkg 0.010
## Porg_mgkg -0.18942328 0.008732727 Porg_mgkg 0.001
## log_root -0.14000008 0.104527314 log_root 0.001
## C_per_N -0.15851066 0.035740927 C_per_N 0.001
## Feox_mmolkg -0.14095001 0.089759046 Feox_mmolkg 0.001
## Alox_mmolkg -0.09537901 0.103687400 Alox_mmolkg 0.001
# all were significant
A new dataset containing species data also needs to be made to look at species vectors.
# wascores computes Weighted Averages scores of species for ordination configuration or for environmental variables.
species.scores12 <- wascores(pcoa$points[,1:2], OTU)
species.scores13 <- wascores(pcoa$points[,1:3], OTU)
# Ordiselect gives me more control for the significant OTUs to display
# I'm using 0.1 % of the most abundant and 100 % of the best fitting OTUs
# NOTE! the higher the ablim maybe more of the low diversity sample' OTUs shown..?
# The species scores of the 0.1% most abundant and 100% of the best fitting OTUs
ordis12 <- ordiselect(OTU, species.scores12, ablim = 0.001, fitlim = 1, choices = c(1,2), method = "axes", env = pcoa.env12)
## [1] "21 species selected (0.1% of total number of species)."
## [1] "All species selected which belong to the 0.1% most abundant species."
ordis12.species.scores <- species.scores12[ordis12, ]
ordis12.species.scores <- cbind(ordis12.species.scores, Species = rownames(ordis12.species.scores)) #add species names to dataframe
# OTUs into species names
OTU.sp <- as.data.frame(tax_table(ps_RA))
OTU.sp$OTU <- rownames(OTU.sp)
# remove other taxa
OTU.sp <- OTU.sp[ , -(1:6)]
try <- merge(data.frame(ordis12.species.scores), data.frame(OTU.sp),
by = 0, all = FALSE)
#add row names
samp2 <- try[,-1]
rownames(samp2) <- try[,1]
try <- samp2
rm(samp2)
# remove the "Species"
try <- try[ , -(3)]
# remove "try"
ordis12.species.scores <- try
rm(try)
head(ordis12.species.scores)
## V1 V2 species
## OTU12776 0.126946387694976 -0.121543372900751 Clonostachys_rosea
## OTU139 0.200755588616414 0.046445493642642 Entomortierella_parvispora
## OTU13985 -0.237582025036352 0.0288145443505883 Saitozyma_podzolica
## OTU19296 -0.295916773006835 -0.149458330721953 Cladorrhinum_unclassified
## OTU20886 -0.25280082681134 -0.121756560222692 Paraphaeosphaeria_unclassified
## OTU23599 -0.22805912031261 0.169675633910535 Paraphaeosphaeria_viciae
## OTU
## OTU12776 OTU12776
## OTU139 OTU139
## OTU13985 OTU13985
## OTU19296 OTU19296
## OTU20886 OTU20886
## OTU23599 OTU23599
# make values numeric
ordis12.species.scores$V1 <- as.numeric(ordis12.species.scores$V1)
ordis12.species.scores$V2 <- as.numeric(ordis12.species.scores$V2)
Now we have the relevant information for plotting the ordination in ggplot
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
site.scrs$new_depth <- site.scrs$depth
site.scrs$new_depth <- gsub("...", "-", site.scrs$new_depth, fixed = TRUE)
site.scrs$new_depth[site.scrs$new_depth=="40-"] <- "40-80"
site.scrs$new_depth[site.scrs$new_depth=="0-10"] <- "0-10 cm"
site.scrs$new_depth[site.scrs$new_depth=="10-20"] <- "10-20 cm"
site.scrs$new_depth[site.scrs$new_depth=="20-30"] <- "20-30 cm"
site.scrs$new_depth[site.scrs$new_depth=="30-40"] <- "30-40 cm"
site.scrs$new_depth[site.scrs$new_depth=="40-80"] <- "40-80 cm"
#First lets run the PCoA with phyloseq to get the axis percentages (plot_ordination gives axis percentages)
GP.ord <- ordinate(ps_RA, "PCoA", "bray")
p2 = plot_ordination(ps_RA, GP.ord, type="samples", color="sample_type", shape="depth")
p2
Remember to change the axis percentages accordingly below!!!
pcoa.plot <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim2, colour = factor(site.scrs$soil_type), shape = factor(site.scrs$new_depth)), size = 6, alpha = 0.6, stroke = 1.5) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC2 (8.0%)", x = "PC1 (20.0%)")
pcoa.plot
First, modify the species names
unique(ordis12.species.scores$species)
## [1] "Clonostachys_rosea" "Entomortierella_parvispora"
## [3] "Saitozyma_podzolica" "Cladorrhinum_unclassified"
## [5] "Paraphaeosphaeria_unclassified" "Paraphaeosphaeria_viciae"
## [7] "Solicoccozyma_terricola" "Pseudeurotium_unclassified"
## [9] "Pseudeurotium_hygrophilum" "Clavulina_cinerea"
## [11] "Fusarium_asiaticum" "Pseudogymnoascus_unclassified"
## [13] "Pleotrichocladium_opacum" "Leotiomycetes_unclassified"
## [15] "Rhexocercosporidium_panacis" "Helotiales_unclassified"
## [17] "Pseudogymnoascus_roseus" "Solicoccozyma_terrea"
## [19] "Mortierella_antarctica" "Glutinoglossum_heptaseptatum"
I want to remove the “unclassified” from the end
ordis12.species.scores$species <- gsub("_unclassified","",as.character(ordis12.species.scores$species))
pcoa.plot_OTU <- pcoa.plot + ggrepel::geom_text_repel(data = ordis12.species.scores, aes(x=V1,y=V2,label=species), alpha=0.7, cex = 3.5, direction = "both", segment.size = 0.2, max.overlaps = Inf) + theme(legend.position = c(0.86, 0.8), legend.text = element_text(size = 12))
pcoa.plot_OTU
First, simplify the names
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "pH_H2O"] <- "pH"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "C_g_per_kg"] <- "C"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "N_gkg"] <- "N"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "TP_gkg"] <- "P-tot"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "depth_numerical"] <- "depth"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "DOC_mgkg"] <- "DOC"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Pinorg_mgkg"] <- "P-inorg"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Porg_mgkg"] <- "P-org"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "log_root"] <- "log root"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "C_per_N"] <- "C/N"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Feox_mmolkg"] <- "Fe-ox"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Alox_mmolkg"] <- "Al-ox"
…then plot
p1 <- pcoa.plot_OTU +
geom_segment(aes(x = 0, y = 0, xend = Dim1, yend = Dim2), data = sig.env.scores_cont12, size =1, alpha = 0.5, colour = "grey30") + ggrepel::geom_text_repel(data = sig.env.scores_cont12, aes(x = Dim1, y = Dim2), colour = "blue", fontface = "bold", label = sig.env.scores_cont12$env.variables, segment.size = 0.2, box.padding = unit(0.1, "lines"), point.padding = (0.1), force = 1, max.time = 30, nudge_y = 0.00, nudge_x = 0.00)
p1
This was saved with width 1000 and height 700
pcoa.env13 <- envfit(pcoa, meta[ ,c("pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "sample_type", "depth_numerical", "DOC_mgkg", "Pinorg_mgkg", "Porg_mgkg", "log_root", "C_per_N", "Feox_mmolkg", "Alox_mmolkg")], na.rm = TRUE, choices=c(1:3), permutations = 999)
pcoa.env13
##
## ***VECTORS
##
## Dim1 Dim2 Dim3 r2 Pr(>r)
## pH_H2O 0.71582 -0.34967 0.60442 0.5947 0.001 ***
## C_g_per_kg -0.70297 0.28453 -0.65182 0.6894 0.001 ***
## N_gkg -0.74553 0.24218 -0.62092 0.6801 0.001 ***
## TP_gkg -0.91468 -0.33863 -0.22064 0.5108 0.001 ***
## depth_numerical 0.82942 0.16519 0.53364 0.5935 0.001 ***
## DOC_mgkg -0.57514 0.48141 -0.66141 0.5386 0.001 ***
## Pinorg_mgkg 0.12414 -0.67859 0.72395 0.1308 0.002 **
## Porg_mgkg -0.84201 0.03811 -0.53812 0.6442 0.001 ***
## log_root -0.70696 0.52696 -0.47172 0.5404 0.001 ***
## C_per_N -0.78189 0.17543 -0.59823 0.4935 0.001 ***
## Feox_mmolkg -0.67315 0.42760 -0.60335 0.5371 0.001 ***
## Alox_mmolkg -0.40124 0.43448 -0.80637 0.5726 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## Dim1 Dim2 Dim3
## sample_typeforest 0.0679 0.1867 0.1190
## sample_typemeadow -0.0229 0.1780 -0.0916
## sample_typeorganic -0.0098 -0.1293 0.0392
## sample_typeconventional 0.0006 -0.1050 0.0041
##
## Goodness of fit:
## r2 Pr(>r)
## sample_type 0.1997 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## 1 observation deleted due to missingness
ef13.adj <- pcoa.env13
pvals.adj <- p.adjust (pcoa.env13$vectors$pvals, method = 'bonferroni')
ef13.adj$vectors$pvals <- pvals.adj
ef13.adj
##
## ***VECTORS
##
## Dim1 Dim2 Dim3 r2 Pr(>r)
## pH_H2O 0.71582 -0.34967 0.60442 0.5947 0.012 *
## C_g_per_kg -0.70297 0.28453 -0.65182 0.6894 0.012 *
## N_gkg -0.74553 0.24218 -0.62092 0.6801 0.012 *
## TP_gkg -0.91468 -0.33863 -0.22064 0.5108 0.012 *
## depth_numerical 0.82942 0.16519 0.53364 0.5935 0.012 *
## DOC_mgkg -0.57514 0.48141 -0.66141 0.5386 0.012 *
## Pinorg_mgkg 0.12414 -0.67859 0.72395 0.1308 0.024 *
## Porg_mgkg -0.84201 0.03811 -0.53812 0.6442 0.012 *
## log_root -0.70696 0.52696 -0.47172 0.5404 0.012 *
## C_per_N -0.78189 0.17543 -0.59823 0.4935 0.012 *
## Feox_mmolkg -0.67315 0.42760 -0.60335 0.5371 0.012 *
## Alox_mmolkg -0.40124 0.43448 -0.80637 0.5726 0.012 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## Dim1 Dim2 Dim3
## sample_typeforest 0.0679 0.1867 0.1190
## sample_typemeadow -0.0229 0.1780 -0.0916
## sample_typeorganic -0.0098 -0.1293 0.0392
## sample_typeconventional 0.0006 -0.1050 0.0041
##
## Goodness of fit:
## r2 Pr(>r)
## sample_type 0.1997 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## 1 observation deleted due to missingness
# first categorial variables
env.scores_cat13 <- as.data.frame(scores(pcoa.env13, display = "factors"))*0.25
env.scores_cat13 <- cbind(env.scores_cat13, env.variables = rownames(env.scores_cat13)) #and then gives them their names
env.scores_cat13 <- cbind(env.scores_cat13, pval = pcoa.env13$factors$pvals) # add pvalues to dataframe
sig.env.scores_cat13 <- subset(env.scores_cat13, pval<=0.05) #subset data to show variables significant at 0.05
# then continous variables
env.scores_cont13 <- as.data.frame(scores(pcoa.env13, display = "vectors"))*0.25
env.scores_cont13 <- cbind(env.scores_cont13, env.variables = rownames(env.scores_cont13)) #and then gives them their names
env.scores_cont13 <- cbind(env.scores_cont13, pval = pcoa.env13$vectors$pvals) # add pvalues to dataframe
sig.env.scores_cont13 <- subset(env.scores_cont13, pval<=0.05) #subset data to show variables significant at 0.05
# The species scores of the 0.1% most abundant and 100% of the best fitting OTUs
ordis13 <- ordiselect(OTU, species.scores13, ablim = 0.001, fitlim = 1, choices = c(1,3), method = "axes", env = pcoa.env13)
## [1] "21 species selected (0.1% of total number of species)."
## [1] "All species selected which belong to the 0.1% most abundant species."
ordis13.species.scores <- species.scores13[ordis13, ]
ordis13.species.scores <- cbind(ordis13.species.scores, Species = rownames(ordis13.species.scores)) #add species names to dataframe
# change into sp names
try <- merge(data.frame(ordis13.species.scores), data.frame(OTU.sp),
by = 0, all = FALSE)
#add row names
samp2 <- try[,-1]
rownames(samp2) <- try[,1]
try <- samp2
rm(samp2)
# remove the "Species"
try <- try[ , -c(2, 4)]
# remove "try"
ordis13.species.scores <- try
rm(try)
head(ordis13.species.scores)
## V1 V3 species
## OTU12776 0.126946387694976 -0.00534028987033588 Clonostachys_rosea
## OTU139 0.200755588616414 -0.0370071455091096 Entomortierella_parvispora
## OTU13985 -0.237582025036352 -0.0956487846132982 Saitozyma_podzolica
## OTU19296 -0.295916773006835 0.120579023660146 Cladorrhinum_unclassified
## OTU20886 -0.25280082681134 0.026574191820344 Paraphaeosphaeria_unclassified
## OTU23599 -0.22805912031261 -0.175344391490801 Paraphaeosphaeria_viciae
## OTU
## OTU12776 OTU12776
## OTU139 OTU139
## OTU13985 OTU13985
## OTU19296 OTU19296
## OTU20886 OTU20886
## OTU23599 OTU23599
ordis13.species.scores$V1 <- as.numeric(ordis13.species.scores$V1)
ordis13.species.scores$V3 <- as.numeric(ordis13.species.scores$V3)
#First lets run the PCoA in with phyloseq to get the axis percentages
GP.ord <- ordinate(ps_RA, "PCoA", "bray", k = 3)
p2 = plot_ordination(ps_RA, GP.ord, type="samples", axes = c(1, 3), color="sample_type", shape="depth")
p2
Remember to change the axis percentages accordingly!!
pcoa.plot <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim3, colour = factor(site.scrs$soil_type), shape = factor(site.scrs$depth)), size = 6, alpha = 0.6, stroke = 1.5) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.position = "right", legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC3 (5.8%)", x = "PC1 (20.0%)")
pcoa.plot
# modify the species names
ordis13.species.scores$species <- gsub("_unclassified","",as.character(ordis13.species.scores$species))
pcoa.plot_OTU <- pcoa.plot + ggrepel::geom_text_repel(data = ordis13.species.scores, aes(x=V1,y=V3,label=species), alpha=0.7, cex = 3.5, direction = "both", segment.size = 0.2, max.overlaps = Inf) + theme(legend.text = element_text(size = 12)) + theme(legend.position = c(0.85, 0.8), legend.text = element_text(size = 12))# + theme(legend.position="none")# if problems, this might help
pcoa.plot_OTU
In the final figure I will not have the env. variables, but let’s see how it looks anyhow
pcoa.plot_OTU +
geom_segment(aes(x = 0, y = 0, xend = Dim1, yend = Dim3), data = sig.env.scores_cont13, size =1, alpha = 0.5, colour = "grey30") + ggrepel::geom_text_repel(data = sig.env.scores_cont13, aes(x = Dim1, y = Dim3), colour = "blue", fontface = "bold", label = sig.env.scores_cont13$env.variables, max.overlaps=Inf, direction = "y", segment.size = 0.2, box.padding = unit(0.5, "lines"), point.padding = (1), force = 1, max.time = 30, nudge_y = 0.01, nudge_x = 0.01) + theme(legend.position = c(0.91, 0.8), legend.text = element_text(size = 12)) #+ theme(legend.position = "none")
Here I do permutational analysis of variance or PERMANOVA. With PERMANOVA, I want to check how much the main treatment factors, management type (here sample_type) and soil layer (depth), are responsible for differences in fungal communities. In addition, I will check how soil layers differ within management type (4.5) and in which soil layers we see a management type effect (4.6)
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
otu <- abundances(ps_RA)
meta <- meta(ps_RA)
ps_RA_bray <- phyloseq::distance(ps_RA, method = "bray")
Check that variance homogeneity assumptions hold (to ensure the reliability of the results). If groups have signif. different spreads the permanova result may be potentially explained by that, rtaher than the groups.
Betadisper first calculates the average distance of group members to the group centroid in multivariate space (generated by a distance matrix). Then, an ANOVA is done to test if the dispersions (variances) of groups are different.
anova(betadisper(ps_RA_bray, meta$sample_type))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 0.05437 0.018125 1.247 0.2953
## Residuals 136 1.97679 0.014535
We see that the ANOVA p-value is not significant meaning that the homogeneity of variance assumption is met
anova(betadisper(ps_RA_bray, meta$depth))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 4 0.41023 0.102556 9.276 1.184e-06 ***
## Residuals 135 1.49258 0.011056
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We see that the ANOVA p-value is highly significant meaning that homogeneity of variance assumption is NOT met
I’ll do post hoc analysis with Tukey’s test to see which groups differ in relation to their variances
TukeyHSD(betadisper(ps_RA_bray, meta$depth))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## 10...20-0...10 -0.028389112 -0.106091062 0.04931284 0.8502993
## 20...30-0...10 0.041806442 -0.035895508 0.11950839 0.5722606
## 30...40-0...10 0.051717780 -0.025984170 0.12941973 0.3549246
## 40...-0...10 0.130756030 0.053054080 0.20845798 0.0000749
## 20...30-10...20 0.070195554 -0.007506396 0.14789750 0.0971647
## 30...40-10...20 0.080106892 0.002404942 0.15780884 0.0397902
## 40...-10...20 0.159145142 0.081443192 0.23684709 0.0000009
## 30...40-20...30 0.009911338 -0.067790612 0.08761329 0.9966478
## 40...-20...30 0.088949588 0.011247638 0.16665154 0.0161603
## 40...-30...40 0.079038250 0.001336300 0.15674020 0.0440806
Dispersions differ significantly between 40… and all other, and between 30…40 and 10…20
The latter is not a problem at all, because I am not interested of comparing layers if they are not consecutive, but I will keep in mind, that the consecutive layers 30-40 cmd and 40-80 cm do not have similar dispersions.
First, I will do PERMANOVA so that I include all management types (later without forest)
# first with just soil type and strata option
adonis2(formula = ps_RA_bray~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks: strata
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
## Df SumOfSqs R2 F Pr(>F)
## sample_type 3 5.591 0.10399 5.2615 1e-04 ***
## Residual 136 48.175 0.89601
## Total 139 53.766 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# then with just depth and strata option
adonis2(formula = ps_RA_bray~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks: strata
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
## Df SumOfSqs R2 F Pr(>F)
## depth 4 9.705 0.1805 7.4335 1e-04 ***
## Residual 135 44.061 0.8195
## Total 139 53.766 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Depth has a larger effect. So, let’s put it first in the model
For the full model it matters which “by” option we choose. When by=“terms” significance for each term is calculated sequentially from first to last, so that the order of terms matter. We will use this, because with sequential analysis we will get R2 values that sum up to 1, and will also get the significance and R2 values for each interaction term separately rather than for the interaction alone.
final <- adonis2(formula = ps_RA_bray ~ depth * sample_type, data = meta, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ depth * sample_type, data = meta, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## depth 4 9.705 0.18050 9.1871 1e-04 ***
## sample_type 3 5.591 0.10399 7.0576 1e-04 ***
## depth:sample_type 12 6.780 0.12611 2.1396 1e-04 ***
## Residual 120 31.690 0.58940
## Total 139 53.766 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#write.csv2(final, "\C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile\\permanova_soiltype_and_depth.csv")
I will not use this, rather the one above with forest
# subset samples
ps_RA <- microbiome::transform(ps, "compositional")
ps_x <- subset_samples(ps_RA, sample_type != "forest")
meta_subset <- meta(ps_x)
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19820 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 19820 taxa by 7 taxonomic ranks ]
b <- phyloseq::distance(ps_x, method = "bray")
set.seed(777)
final <- adonis2(formula = b ~ depth*sample_type, data = meta_subset, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = b ~ depth * sample_type, data = meta_subset, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## depth 4 9.800 0.21235 9.7433 1e-04 ***
## sample_type 2 3.927 0.08509 7.8083 1e-04 ***
## depth:sample_type 8 4.763 0.10320 2.3676 1e-04 ***
## Residual 110 27.660 0.59936
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pairwise PERMANOVA I will only do so that forest is excluded due to too few replicates for forest. But I will not use these result, this is just to check
set.seed(777)
pair.mod<-pairwise.adonis(b, factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 2.351620 6.722317 0.07492358 0.001 0.003
## 2 conventional vs organic 1 1.000537 2.947677 0.03429618 0.010 0.030
## 3 meadow vs organic 1 2.574066 7.371501 0.08634616 0.001 0.003
## sig
## 1 *
## 2 .
## 3 *
Each treatment differ from the other treatments at the 0.05 confidence level.
I will do pairwise permanova analysis of depth for all management types separately, except for forest which has too few replicates
# subset samples
x <- "meadow"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 11032 taxa and 40 samples ]
## sample_data() Sample Data: [ 40 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 11032 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 0...10 vs 10...20 1 0.6686591 4.418415 0.2398912 0.001 0.01 *
## 2 0...10 vs 20...30 1 1.2787641 6.259368 0.3089617 0.001 0.01 *
## 3 0...10 vs 30...40 1 2.1571845 9.795664 0.4116575 0.001 0.01 *
## 4 0...10 vs 40... 1 1.6755335 5.447545 0.2801148 0.002 0.02 .
## 5 10...20 vs 20...30 1 0.3728923 1.802396 0.1140584 0.048 0.48
## 6 10...20 vs 30...40 1 1.6735250 7.511019 0.3491708 0.001 0.01 *
## 7 10...20 vs 40... 1 1.5401662 4.965605 0.2618216 0.002 0.02 .
## 8 20...30 vs 30...40 1 0.9160107 3.321638 0.1917624 0.001 0.01 *
## 9 20...30 vs 40... 1 1.0302087 2.837037 0.1684998 0.001 0.01 *
## 10 30...40 vs 40... 1 0.7004325 1.847860 0.1166000 0.012 0.12
# subset samples
x <- "organic"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14151 taxa and 40 samples ]
## sample_data() Sample Data: [ 40 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 14151 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 0...10 vs 10...20 1 0.1366228 0.906114 0.06078808 0.443 1.00
## 2 0...10 vs 20...30 1 0.4849847 2.278740 0.13998260 0.007 0.07
## 3 0...10 vs 30...40 1 2.0284059 8.323851 0.37286806 0.001 0.01 *
## 4 0...10 vs 40... 1 2.3642953 11.171511 0.44381567 0.002 0.02 .
## 5 10...20 vs 20...30 1 0.4214687 2.119591 0.13149163 0.016 0.16
## 6 10...20 vs 30...40 1 2.1185424 9.223078 0.39715141 0.001 0.01 *
## 7 10...20 vs 40... 1 2.4397574 12.343810 0.46856585 0.002 0.02 .
## 8 20...30 vs 30...40 1 1.0503864 3.600278 0.20455802 0.001 0.01 *
## 9 20...30 vs 40... 1 1.4017114 5.397392 0.27825350 0.001 0.01 *
## 10 30...40 vs 40... 1 0.5095377 1.753656 0.11131738 0.041 0.41
# subset samples
x <- "conventional"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 13863 taxa and 45 samples ]
## sample_data() Sample Data: [ 45 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 13863 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 0...10 vs 10...20 1 0.1170223 0.8087735 0.04811615 0.757 1.00
## 2 0...10 vs 20...30 1 0.7762987 3.5720248 0.18250665 0.003 0.03 .
## 3 0...10 vs 30...40 1 2.1200130 8.7764519 0.35422553 0.001 0.01 *
## 4 0...10 vs 40... 1 1.6494331 5.3721690 0.25136283 0.001 0.01 *
## 5 10...20 vs 20...30 1 0.7642509 4.0496264 0.20198014 0.007 0.07
## 6 10...20 vs 30...40 1 2.2368859 10.5042323 0.39632283 0.001 0.01 *
## 7 10...20 vs 40... 1 1.8451677 6.6271158 0.29288381 0.001 0.01 *
## 8 20...30 vs 30...40 1 0.6735640 2.3585234 0.12847021 0.025 0.25
## 9 20...30 vs 40... 1 0.7552291 2.1512623 0.11851861 0.013 0.13
## 10 30...40 vs 40... 1 0.4996687 1.3314104 0.07682066 0.105 1.00
I will analyse these without forest as forest has too few replicates
# define soil layer to be analysed
x <- "0...10"
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 13638 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 13638 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 1.5452073 9.547816 0.3889477 0.001 0.003
## 2 conventional vs organic 1 0.7360756 4.347365 0.2247006 0.001 0.003
## 3 meadow vs organic 1 1.5542597 9.915270 0.4146000 0.002 0.006
## sig
## 1 *
## 2 *
## 3 *
# subset samples
x <- "10...20"
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 15128 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 15128 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 1.3202292 9.871317 0.3968956 0.001 0.003
## 2 conventional vs organic 1 0.6464922 5.141142 0.2552557 0.001 0.003
## 3 meadow vs organic 1 1.3555168 9.325285 0.3997930 0.002 0.006
## sig
## 1 *
## 2 *
## 3 *
# subset samples
x <- "20...30"
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14304 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 14304 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.8814463 3.381681 0.1839702 0.001 0.003
## 2 conventional vs organic 1 0.4094517 1.567927 0.0946363 0.114 0.342
## 3 meadow vs organic 1 0.8348207 3.206261 0.1863427 0.002 0.006
## sig
## 1 *
## 2
## 3 *
# subset samples
x <- "30...40"
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 5256 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 5256 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.7196829 2.3881246 0.13734228 0.006 0.018
## 2 conventional vs organic 1 0.2610497 0.8266673 0.05223256 0.707 1.000
## 3 meadow vs organic 1 0.5459411 1.7774404 0.11265708 0.012 0.036
## sig
## 1 .
## 2
## 3 .
# subset samples
x <- "40..."
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2487 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 2487 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.4663823 1.030147 0.06426309 0.353 1.000
## 2 conventional vs organic 1 0.7337456 2.062743 0.12089163 0.011 0.033
## 3 meadow vs organic 1 1.0846039 2.992358 0.17610025 0.002 0.006
## sig
## 1
## 2 .
## 3 *
PERMANOVA with soil properties will be done with only meadow, organic and conventional treatments excluding forest
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19820 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 19820 taxa by 7 taxonomic ranks ]
otu <- abundances(ps_RA_nf)
meta <- meta(ps_RA_nf)
Note: adonis cannot handle or account for NA or blanks in your data. Use na.omit(meta) %>% before each run where such variables are used where NAs exist
ps_RA_bray <- phyloseq::distance(ps_RA_nf, method = "bray")
I will use the following soil properties
“log_root”
“pH_H2O”
“C_g_per_kg”
“N_gkg”
“TP_gkg”
“Alox_mmolkg”
“Feox_mmolkg”
“PH2O_mgkg”
“Porg_mgkg”
“DOC_mgkg”
“Pinorg_mgkg”
“C_per_N”
final <- adonis2(formula = ps_RA_bray ~ log_root, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ log_root, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 5.369 0.11634 16.194 1e-04 ***
## Residual 123 40.780 0.88366
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ pH_H2O, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ pH_H2O, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 6.041 0.1309 18.526 1e-04 ***
## Residual 123 40.108 0.8691
## Total 124 46.149 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ C_g_per_kg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ C_g_per_kg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 6.482 0.14046 20.1 1e-04 ***
## Residual 123 39.667 0.85954
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ N_gkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ N_gkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 6.647 0.14404 20.698 1e-04 ***
## Residual 123 39.502 0.85596
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ TP_gkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ TP_gkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 5.559 0.12046 16.846 1e-04 ***
## Residual 123 40.590 0.87954
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Alox_mmolkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ Alox_mmolkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 3.713 0.08045 10.762 1e-04 ***
## Residual 123 42.437 0.91955
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Feox_mmolkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ Feox_mmolkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 4.853 0.10517 14.456 1e-04 ***
## Residual 123 41.296 0.89483
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ DOC_mgkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ DOC_mgkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 5.300 0.11484 15.957 1e-04 ***
## Residual 123 40.850 0.88516
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Pinorg_mgkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ Pinorg_mgkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.699 0.01515 1.8918 0.0305 *
## Residual 123 45.450 0.98485
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ C_per_N, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ C_per_N, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Model 1 4.909 0.10637 14.641 1e-04 ***
## Residual 123 41.240 0.89363
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
colSums(is.na(meta))
## sampleID plot sampling_position depth
## 0 0 0 0
## depth_numerical vegetation sample_type root_mgg
## 0 0 0 0
## pH_H2O EC_uScm C_g_per_kg N_gkg
## 0 0 0 0
## TP_gkg Alox_mmolkg Feox_mmolkg oxides_mmolkg
## 0 0 0 0
## PH2O_mgkg Porg_mgkg DOC_mgkg Pinorg_mgkg
## 1 1 0 0
## C_per_N observed chao1 shannon
## 0 0 0 0
## observed_sng chao1_sng shannon_sng log_root
## 0 0 0 0
These are NA:
Porg_mgkg for sample NG2A2_30to40
PH2O_mgkg for sample NG2B3_0to10
m <- meta %>% drop_na(Porg_mgkg)
x <- subset_samples(ps_RA_nf, sampleID!="NG2A2_30to40")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(x),
MARGIN = ifelse(taxa_are_rows(x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
x = prune_taxa((prev0 > 0), x)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19817 taxa and 124 samples ]
## sample_data() Sample Data: [ 124 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 19817 taxa by 7 taxonomic ranks ]
otu <- abundances(x)
b <- phyloseq::distance(x, method = "bray")
final <- adonis2(formula = b ~ Porg_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = b ~ Porg_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## Porg_mgkg 1 6.645 0.14528 20.736 1e-04 ***
## Residual 122 39.095 0.85472
## Total 123 45.739 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m <- meta %>% drop_na(PH2O_mgkg)
x <- subset_samples(ps_RA_nf, sampleID!="NG2B3_0to10")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(x),
MARGIN = ifelse(taxa_are_rows(x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
x = prune_taxa((prev0 > 0), x)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19819 taxa and 124 samples ]
## sample_data() Sample Data: [ 124 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 19819 taxa by 7 taxonomic ranks ]
otu <- abundances(x)
b <- phyloseq::distance(x, method = "bray")
final <- adonis2(formula = b ~ PH2O_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = b ~ PH2O_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## PH2O_mgkg 1 1.935 0.04229 5.3866 1e-04 ***
## Residual 122 43.824 0.95771
## Total 123 45.759 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
remove these samples
Porg_mgkg (NG2A2_30to40) PH2O_mgkg (NG2B3_0to10)
m <- meta %>% drop_na(PH2O_mgkg)
m <- m %>% drop_na(Porg_mgkg)
ps_x <- subset_samples(ps_RA_nf, sampleID!="NG2B3_0to10")
ps_x <- subset_samples(ps_x, sampleID!="NG2A2_30to40")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19816 taxa and 123 samples ]
## sample_data() Sample Data: [ 123 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 19816 taxa by 7 taxonomic ranks ]
b <- phyloseq::distance(ps_x, method = "bray")
final <- adonis2(formula = b ~ log_root + pH_H2O + C_g_per_kg + N_gkg + TP_gkg + Alox_mmolkg + Feox_mmolkg + PH2O_mgkg + Porg_mgkg + DOC_mgkg + Pinorg_mgkg + C_per_N, data = m, permutations = 9999, method = "bray", by = NULL)
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = b ~ log_root + pH_H2O + C_g_per_kg + N_gkg + TP_gkg + Alox_mmolkg + Feox_mmolkg + PH2O_mgkg + Porg_mgkg + DOC_mgkg + Pinorg_mgkg + C_per_N, data = m, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 12 15.333 0.33811 4.6826 1e-04 ***
## Residual 110 30.016 0.66189
## Total 122 45.350 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
run for all layers and all of these separately:
# Define the environmental variables as a character vector, not as a factor
env <- c("log_root", "pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "Alox_mmolkg", "Feox_mmolkg",
"PH2O_mgkg", "Porg_mgkg", "DOC_mgkg", "Pinorg_mgkg", "C_per_N")
# Convert the 'depth' column to a factor
meta$depth <- as.factor(meta$depth)
# Initialize an empty list to store the results
adonis_results <- list()
# Loop over each depth and environmental variable
for (i in levels(meta$depth)) {
for (j in env) {
# Subset samples
ps_x <- subset_samples(ps_RA, sample_type != "forest")
ps_x <- subset_samples(ps_x, sampleID != "NG2B3_0to10")
ps_x <- subset_samples(ps_x, sampleID != "NG2A2_30to40")
ps_x <- subset_samples(ps_x, depth == i)
meta_subset <- meta(ps_x)
# Define prevalence of each taxa (in how many samples did each taxa appear at least once)
prev0 <- apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x) { sum(x > 0) })
# Execute prevalence filter, using `prune_taxa()` function
ps_x <- prune_taxa((prev0 > 0), ps_x)
# Calculate Bray-Curtis distance
b <- phyloseq::distance(ps_x, method = "bray")
# Run adonis2 analysis
formula <- as.formula(paste("b ~", j))
adonis_result <- adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
# Store the result in the list with a descriptive name
result_name <- paste("depth", i, "env", j, sep = "_")
adonis_results[[result_name]] <- adonis_result
}
}
# View the list of results
adonis_results
## $depth_0...10_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7966 0.13653 3.4787 0.0014 **
## Residual 22 5.0376 0.86347
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6628 0.1136 2.8195 0.0054 **
## Residual 22 5.1714 0.8864
## Total 23 5.8342 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.5213 0.26075 7.7599 1e-04 ***
## Residual 22 4.3129 0.73925
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.4712 0.25216 7.4181 1e-04 ***
## Residual 22 4.3630 0.74784
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2863 0.04908 1.1354 0.2882
## Residual 22 5.5479 0.95092
## Total 23 5.8342 1.00000
##
## $depth_0...10_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.0063 0.17248 4.5855 1e-04 ***
## Residual 22 4.8279 0.82752
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7390 0.12666 3.1908 0.0025 **
## Residual 22 5.0952 0.87334
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5390 0.09239 2.2396 0.0204 *
## Residual 22 5.2951 0.90761
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7905 0.13549 3.4479 9e-04 ***
## Residual 22 5.0437 0.86451
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.4018 0.24027 6.9575 1e-04 ***
## Residual 22 4.4324 0.75973
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3372 0.05779 1.3495 0.1689
## Residual 22 5.4970 0.94221
## Total 23 5.8342 1.00000
##
## $depth_0...10_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.1766 0.20168 5.5579 1e-04 ***
## Residual 22 4.6575 0.79832
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7280 0.14084 3.7704 0.0013 **
## Residual 23 4.4407 0.85916
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.9411 0.18209 5.1204 2e-04 ***
## Residual 23 4.2275 0.81791
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.2399 0.23989 7.2586 1e-04 ***
## Residual 23 3.9287 0.76011
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.9946 0.19242 5.4802 1e-04 ***
## Residual 23 4.1741 0.80758
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2530 0.04895 1.1838 0.243
## Residual 23 4.9156 0.95105
## Total 24 5.1686 1.00000
##
## $depth_10...20_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.2957 0.25069 7.6951 1e-04 ***
## Residual 23 3.8729 0.74931
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.1620 0.22483 6.6708 1e-04 ***
## Residual 23 4.0066 0.77517
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3596 0.06957 1.7196 0.0688 .
## Residual 23 4.8091 0.93043
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2818 0.05453 1.3265 0.1728
## Residual 23 4.8868 0.94547
## Total 24 5.1686 1.00000
##
## $depth_10...20_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.1131 0.21535 6.3126 1e-04 ***
## Residual 23 4.0555 0.78465
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3936 0.07615 1.8959 0.05 *
## Residual 23 4.7750 0.92385
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5862 0.11342 2.9425 0.005 **
## Residual 23 4.5824 0.88658
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7395 0.10346 2.6541 0.0029 **
## Residual 23 6.4086 0.89654
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.8450 0.11821 3.0833 2e-04 ***
## Residual 23 6.3032 0.88179
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7142 0.09991 2.5531 0.0049 **
## Residual 23 6.4340 0.90009
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6962 0.09739 2.4816 0.0067 **
## Residual 23 6.4520 0.90261
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4651 0.06506 1.6005 0.0711 .
## Residual 23 6.6831 0.93494
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6839 0.09567 2.4332 0.0059 **
## Residual 23 6.4643 0.90433
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7618 0.10658 2.7437 0.0021 **
## Residual 23 6.3863 0.89342
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4951 0.06927 1.7117 0.0493 *
## Residual 23 6.6530 0.93073
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6443 0.09014 2.2786 0.0105 *
## Residual 23 6.5038 0.90986
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6601 0.09235 2.3401 0.0085 **
## Residual 23 6.4881 0.90765
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3030 0.04239 1.0182 0.4027
## Residual 23 6.8451 0.95761
## Total 24 7.1482 1.00000
##
## $depth_20...30_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4538 0.06348 1.5591 0.0878 .
## Residual 23 6.6944 0.93652
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5268 0.07033 1.6643 0.0328 *
## Residual 22 6.9641 0.92967
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4728 0.06311 1.482 0.0711 .
## Residual 22 7.0181 0.93689
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2850 0.03804 0.87 0.5167
## Residual 22 7.2060 0.96196
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2756 0.03678 0.8402 0.6539
## Residual 22 7.2154 0.96322
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4086 0.05455 1.2693 0.1997
## Residual 22 7.0823 0.94545
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5886 0.07858 1.8761 0.0193 *
## Residual 22 6.9023 0.92142
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5944 0.07936 1.8963 0.018 *
## Residual 22 6.8965 0.92064
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3054 0.04077 0.935 0.5159
## Residual 22 7.1855 0.95923
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5900 0.07877 1.881 0.0144 *
## Residual 22 6.9009 0.92123
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3720 0.04966 1.1495 0.3077
## Residual 22 7.1189 0.95034
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2953 0.03942 0.9028 0.5857
## Residual 22 7.1956 0.96058
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4801 0.06409 1.5066 0.1147
## Residual 22 7.0108 0.93591
## Total 23 7.4909 1.00000
##
## $depth_40..._env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4089 0.04044 0.9693 0.4857
## Residual 23 9.7019 0.95956
## Total 24 10.1108 1.00000
##
## $depth_40..._env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5118 0.05062 1.2263 0.1537
## Residual 23 9.5990 0.94938
## Total 24 10.1108 1.00000
##
## $depth_40..._env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4801 0.04748 1.1465 0.2979
## Residual 23 9.6307 0.95252
## Total 24 10.1108 1.00000
##
## $depth_40..._env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3803 0.03761 0.8989 0.6571
## Residual 23 9.7305 0.96239
## Total 24 10.1108 1.00000
##
## $depth_40..._env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3102 0.03068 0.7281 0.9339
## Residual 23 9.8005 0.96932
## Total 24 10.1108 1.00000
##
## $depth_40..._env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3621 0.03582 0.8544 0.7201
## Residual 23 9.7487 0.96418
## Total 24 10.1108 1.00000
##
## $depth_40..._env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.8108 0.08019 2.0052 0.0017 **
## Residual 23 9.3000 0.91981
## Total 24 10.1108 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_40..._env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4612 0.04561 1.0992 0.2755
## Residual 23 9.6496 0.95439
## Total 24 10.1108 1.00000
##
## $depth_40..._env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3897 0.03854 0.9221 0.5867
## Residual 23 9.7211 0.96146
## Total 24 10.1108 1.00000
##
## $depth_40..._env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3038 0.03005 0.7125 0.8952
## Residual 23 9.8070 0.96995
## Total 24 10.1108 1.00000
##
## $depth_40..._env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2886 0.02854 0.6757 0.9697
## Residual 23 9.8222 0.97146
## Total 24 10.1108 1.00000
##
## $depth_40..._env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5202 0.05145 1.2476 0.1383
## Residual 23 9.5906 0.94855
## Total 24 10.1108 1.00000
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19820 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 19820 taxa by 7 taxonomic ranks ]
meta_nf <- meta(ps_RA_nf)
I will do heatmap for fungal genera using only meadow, organic and conventional soils (without forest). I will use RA values For the final heatmap figure the RA values will be standardizes (z-transformed)
!! NOTE!! I will also include taxa that are not classified at genus level in the heatmap
ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/140)
ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
ps_genus_RA <- microbiome::transform(ps_genus, "compositional")
ps_genus_RA_pruned <- subset_samples(ps_genus_RA, sample_type!="forest")
ps_genus_RA_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
# sample wise filtering according to most abund. genera
# Initialize an empty list to store the taxa
abund.taxa <- list()
for (i in meta_nf$sample_type) {
for (j in meta_nf$depth) {
x <- sample_names(sample_data(ps_genus_RA_pruned)[sample_data(ps_genus_RA_pruned)$sample_type == i & sample_data(ps_genus_RA_pruned)$depth == j,])
# Calculate taxa mean of the selected samples
top20 <- head(sort(rowMeans(otu_table(ps_genus_RA_pruned)[,x]), decreasing = TRUE), 20)
result_name <- paste("sample_type", i, "depth", j, sep = "_")
abund.taxa[[result_name]] <- top20
}
}
management_layer <- c(names(abund.taxa))
all_top20 <- c()
for (i in management_layer) {
top20 <- c(names(abund.taxa[[i]]))
all_top20 <- c(all_top20, top20)
}
all_top20_unique <- unique(all_top20)
length(all_top20_unique)
## [1] 96
all_top20_unique_df <- as.data.frame(all_top20_unique)
# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)
# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top20_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
ps_genus_nf_HETAMAP <- ps_RA_nf_genus_pruned
meta_nf$sample_type_depth <- NA
meta_nf$sample_type_depth[meta_nf$depth_numerical==5 & meta_nf$sample_type=="meadow"]<-"0...10_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==5 & meta_nf$sample_type=="organic"]<-"0...10_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==5 & meta_nf$sample_type=="conventional"]<-"0...10_conventional"
meta_nf$sample_type_depth[meta_nf$depth_numerical==15 & meta_nf$sample_type=="meadow"]<-"10...20_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==15 & meta_nf$sample_type=="organic"]<-"10...20_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==15 & meta_nf$sample_type=="conventional"]<-"10...20_conventional"
meta_nf$sample_type_depth[meta_nf$depth_numerical==25 & meta_nf$sample_type=="meadow"]<-"20...30_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==25 & meta_nf$sample_type=="organic"]<-"20...30_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==25 & meta_nf$sample_type=="conventional"]<-"20...30_conventional"
meta_nf$sample_type_depth[meta_nf$depth_numerical==35 & meta_nf$sample_type=="meadow"]<-"30...40_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==35 & meta_nf$sample_type=="organic"]<-"30...40_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==35 & meta_nf$sample_type=="conventional"]<-"30...40_conventional"
meta_nf$sample_type_depth[meta_nf$depth_numerical==60 & meta_nf$sample_type=="meadow"]<-"40...80_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==60 & meta_nf$sample_type=="organic"]<-"40...80_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==60 & meta_nf$sample_type=="conventional"]<-"40...80_conventional"
sample_type_depth <- unique(meta_nf$sample_type_depth)
sample_type_depth
## [1] "0...10_conventional" "10...20_conventional" "20...30_conventional"
## [4] "30...40_conventional" "40...80_conventional" "0...10_meadow"
## [7] "10...20_meadow" "20...30_meadow" "30...40_meadow"
## [10] "40...80_meadow" "0...10_organic" "10...20_organic"
## [13] "20...30_organic" "30...40_organic" "40...80_organic"
# add the new meta_nf to ps_RA_nf_genus_pruned
sample_data(ps_RA_nf_genus_pruned) <- sample_data(meta_nf)
lets test separately for the ones that make sense, for example, no point testing between organic 0…10 and conventional 10…20, but rather 0…10 for both soil types and then within organic between 0…10 and 10…20
NOTE! At the end of chunk I do p-value adjustment (“BH”) for all comparisons in the chunk
library("data.table")
library("rstatix")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="0...10_meadow" | sample_type_depth=="0...10_conventional" | sample_type_depth=="0...10_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
# Make sample_type_depth into a factor
melt_df$sample_type_depth <- factor(melt_df$sample_type_depth)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = NULL)
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df1 <- rbindlist(pval.list)
pval.list_df1 <- as.data.frame(pval.list_df1)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df1 <- gather(pval.list_df1, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df1 <- adjust_pvalue(pval.list_df1, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="10...20_meadow" | sample_type_depth=="10...20_conventional" | sample_type_depth=="10...20_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df2 <- rbindlist(pval.list)
pval.list_df2 <- as.data.frame(pval.list_df2)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df2 <- gather(pval.list_df2, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df2 <- adjust_pvalue(pval.list_df2, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="20...30_meadow" | sample_type_depth=="20...30_conventional" | sample_type_depth=="20...30_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df3 <- rbindlist(pval.list)
pval.list_df3 <- as.data.frame(pval.list_df3)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df3 <- gather(pval.list_df3, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df3 <- adjust_pvalue(pval.list_df3, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="30...40_meadow" | sample_type_depth=="30...40_conventional" | sample_type_depth=="30...40_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df4 <- rbindlist(pval.list)
pval.list_df4 <- as.data.frame(pval.list_df4)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df4 <- gather(pval.list_df4, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df4 <- adjust_pvalue(pval.list_df4, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="40...80_meadow" | sample_type_depth=="40...80_conventional" | sample_type_depth=="40...80_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df5 <- rbindlist(pval.list)
pval.list_df5 <- as.data.frame(pval.list_df5)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df5 <- gather(pval.list_df5, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df5 <- adjust_pvalue(pval.list_df5, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="0...10_organic" | sample_type_depth=="10...20_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df6 <- rbindlist(pval.list)
pval.list_df6 <- as.data.frame(pval.list_df6)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df6 <- gather(pval.list_df6, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df6 <- adjust_pvalue(pval.list_df6, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="10...20_organic" | sample_type_depth=="20...30_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df7 <- rbindlist(pval.list)
pval.list_df7 <- as.data.frame(pval.list_df7)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df7 <- gather(pval.list_df7, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df7 <- adjust_pvalue(pval.list_df7, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="20...30_organic" | sample_type_depth=="30...40_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df8 <- rbindlist(pval.list)
pval.list_df8 <- as.data.frame(pval.list_df8)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df8 <- gather(pval.list_df8, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df8 <- adjust_pvalue(pval.list_df8, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="30...40_organic" | sample_type_depth=="40...80_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df9 <- rbindlist(pval.list)
pval.list_df9 <- as.data.frame(pval.list_df9)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df9 <- gather(pval.list_df9, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df9 <- adjust_pvalue(pval.list_df9, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="0...10_conventional" | sample_type_depth=="10...20_conventional")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 18 samples ]
## sample_data() Sample Data: [ 18 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df10 <- rbindlist(pval.list)
pval.list_df10 <- as.data.frame(pval.list_df10)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df10 <- gather(pval.list_df10, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df10 <- adjust_pvalue(pval.list_df10, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="10...20_conventional" | sample_type_depth=="20...30_conventional")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 18 samples ]
## sample_data() Sample Data: [ 18 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df11 <- rbindlist(pval.list)
pval.list_df11 <- as.data.frame(pval.list_df11)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df11 <- gather(pval.list_df11, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df11 <- adjust_pvalue(pval.list_df11, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="20...30_conventional" | sample_type_depth=="30...40_conventional")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 18 samples ]
## sample_data() Sample Data: [ 18 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df12 <- rbindlist(pval.list)
pval.list_df12 <- as.data.frame(pval.list_df12)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df12 <- gather(pval.list_df12, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df12 <- adjust_pvalue(pval.list_df12, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="30...40_conventional" | sample_type_depth=="40...80_conventional")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 18 samples ]
## sample_data() Sample Data: [ 18 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df13 <- rbindlist(pval.list)
pval.list_df13 <- as.data.frame(pval.list_df13)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df13 <- gather(pval.list_df13, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df13 <- adjust_pvalue(pval.list_df13, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="0...10_meadow" | sample_type_depth=="10...20_meadow")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df14 <- rbindlist(pval.list)
pval.list_df14 <- as.data.frame(pval.list_df14)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df14 <- gather(pval.list_df14, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df14 <- adjust_pvalue(pval.list_df14, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="10...20_meadow" | sample_type_depth=="20...30_meadow")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df15 <- rbindlist(pval.list)
pval.list_df15 <- as.data.frame(pval.list_df15)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df15 <- gather(pval.list_df15, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df15 <- adjust_pvalue(pval.list_df15, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="20...30_meadow" | sample_type_depth=="30...40_meadow")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df16 <- rbindlist(pval.list)
pval.list_df16 <- as.data.frame(pval.list_df16)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df16 <- gather(pval.list_df16, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df16 <- adjust_pvalue(pval.list_df16, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="30...40_meadow" | sample_type_depth=="40...80_meadow")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df17 <- rbindlist(pval.list)
pval.list_df17 <- as.data.frame(pval.list_df17)
# make into long format: where the new column called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df17 <- gather(pval.list_df17, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df17 <- adjust_pvalue(pval.list_df17, p.col = "p_value", output.col = "adj_p", method = "BH")
all.pvals <- rbind(pval.list_df1, pval.list_df2, pval.list_df3, pval.list_df4, pval.list_df5, pval.list_df6, pval.list_df7, pval.list_df8, pval.list_df9, pval.list_df10, pval.list_df11, pval.list_df12, pval.list_df13, pval.list_df14, pval.list_df14, pval.list_df16, pval.list_df17)
all.sig.pvals <- all.pvals[all.pvals$adj_p <= 0.05, ]
all.sig.genus <- unique(all.sig.pvals$genus)
length(all.sig.genus)
## [1] 70
# only keep sig
# filter by taxa name
ps_RA_nf_genus_pruned_sig <- prune_taxa(all.sig.genus, ps_RA_nf_genus_pruned)
ps_Heatmap <- ps_RA_nf_genus_pruned_sig
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 69 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 69 taxa by 1 taxonomic ranks ]
rm(ps_RA_nf_genus_pruned_sig)
df <- psmelt(ps_Heatmap)
x <- df %>%
group_by(OTU, sample_type, depth) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 1,035 × 5
## # Groups: OTU, sample_type [207]
## OTU sample_type depth mean se
## <chr> <fct> <chr> <dbl> <dbl>
## 1 Acremonium meadow 0...10 0.00996 0.00774
## 2 Acremonium meadow 10...20 0.00372 0.000836
## 3 Acremonium meadow 20...30 0.00656 0.00243
## 4 Acremonium meadow 30...40 0.00759 0.00494
## 5 Acremonium meadow 40... 0.000577 0.000564
## 6 Acremonium organic 0...10 0.00551 0.00103
## 7 Acremonium organic 10...20 0.0158 0.00834
## 8 Acremonium organic 20...30 0.0157 0.00325
## 9 Acremonium organic 30...40 0.0105 0.00452
## 10 Acremonium organic 40... 0.0713 0.0538
## # ℹ 1,025 more rows
I need to make a separate funguild phyloseq for heatmap, where I have only the genus and higher level annotation (no species level)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
FG <- read.csv2("FUNGuild_31_05_2024.csv")
# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus" "Species" "Variety" "Family" "Order"
## [6] "Phylum" "Form" "Subspecies"
I will get the annotations from genus and higher tax levels:
Genus
fg <- FG[FG$taxonomicLevel == "Genus", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
# and OTU as row names
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"
# save with new name
FUNGuild_gen <- FG_tax_table
Family
fg <- FG[FG$taxonomicLevel == "Family", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"
# save with new name
FUNGuild_fam <- FG_tax_table
Order
fg <- FG[FG$taxonomicLevel == "Order", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"
# save with new name
FUNGuild_ord <- FG_tax_table
Phylum
fg <- FG[FG$taxonomicLevel == "Phylum", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"
# save with new name
FUNGuild_phy <- FG_tax_table
Combine all annotations:Genus Family Order Phylum
x <- left_join(rownames_to_column(FUNGuild_gen), rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA
y <- x %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_fam))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_fam))
# same for order
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_ord))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_ord))
# same for phylum
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_phy))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_phy))
# then rename the gen columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"
# remove the rest of the columns
y <- y[, -c(4:11)]
# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# Remove empty spaces and remove also "|"
y2$trophicMode <- gsub(" ", "", y2$trophicMode, fixed = TRUE)
y2$guild <- gsub("|", "", y2$guild, fixed = TRUE)
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]
Define AMFs, Ectomycorrhizal and Plant pathogens
Here in FUNGuild column:
z <- y3
z <- z %>%
mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" | FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" | FG != "Arbuscular Mycorrhizal" | FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
FG_tax_table <- z
# remove the FG column
FG_tax_table <- FG_tax_table[, -c(11)]
Check the different written forms, if I have empty spaces?
unique(FG_tax_table$trophicMode)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Pathotroph" "Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [9] "Saprotroph-Saprotroph-Symbiotroph" "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Plant Pathogen" "Ectomycorrhizal"
## [7] "Arbuscular Mycorrhizal" "Pathotroph"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [11] "Symbiotroph" "Saprotroph-Saprotroph-Symbiotroph"
## [13] "Endophyte" "Pathotroph-Pathotroph-Saprotroph"
# change some names for FUNGuild
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"
# and for trophicMode
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"
# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]
Check again
unique(FG_tax_table$trophicMode)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Pathotroph" "Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Plant Pathogen" "Ectomycorrhizal"
## [7] "Arbuscular Mycorrhizal" "Other Pathotroph"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [11] "Other Symbiotroph" "Endophyte"
ps_FG_HEATMAP_no_clusters <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG_HEATMAP_no_clusters
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
ps_RA <- transform(ps, "compositional")
# Pick relative abundances (compositional) and sample metadata
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19820 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 19820 taxa by 7 taxonomic ranks ]
all.sig.genus <- unique(all.sig.pvals$genus)
length(all.sig.genus)
## [1] 70
# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)
all_top20_unique <- all_top20_unique_df$all_top20_unique
# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top20_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 96 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 96 taxa by 1 taxonomic ranks ]
# filter by taxa name
ps_RA_nf_genus_pruned_sig <- prune_taxa(all.sig.genus, ps_RA_nf_genus_pruned)
ps_Heatmap <- ps_RA_nf_genus_pruned_sig
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 69 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 69 taxa by 1 taxonomic ranks ]
FG_tax <- as.data.frame(as.matrix(tax_table(ps_FG_HEATMAP_no_clusters)))
ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/100)
ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
# subset
ps_genus_NO_FOREST <- subset_samples(ps_genus, sample_type!="forest")
ps_genus_NO_FOREST
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
meta_nf <- meta(ps_genus_NO_FOREST)
meta_nf$Depth <- gsub("...", "-", meta_nf$depth, fixed = TRUE)
meta_nf$Depth[meta_nf$Depth=="40-"] <- "40-80"
meta_nf$Depth[meta_nf$Depth=="0-10"] <- "0-10 cm"
meta_nf$Depth[meta_nf$Depth=="10-20"] <- "10-20 cm"
meta_nf$Depth[meta_nf$Depth=="20-30"] <- "20-30 cm"
meta_nf$Depth[meta_nf$Depth=="30-40"] <- "30-40 cm"
meta_nf$Depth[meta_nf$Depth=="40-80"] <- "40-80 cm"
# RA-transform
ps_genus_NO_FOREST_RA <- microbiome::transform(ps_genus_NO_FOREST, 'compositional')
# Z-transform
ps_genus_NO_FOREST_RA_z <- microbiome::transform(ps_genus_NO_FOREST_RA, 'Z', log10 = FALSE)
# subset to only the significant genus from above
hetamap.taxa <- taxa_names(ps_Heatmap)
data <- as(otu_table(ps_genus_NO_FOREST_RA_z), "matrix")
data <- as.data.frame(data)
data_subset <- data[(row.names(data) %in% hetamap.taxa),]
data_subset <- as.matrix(data_subset)
# add annotations "Depth" and "Treatment"
my_sample_col <- data.frame(meta_nf[c("Depth", "sample_type")], row.names = row.names(meta_nf))
colnames(my_sample_col) <- c("Depth", "Treatment")
x <- as.data.frame(t(data_subset))
y <- as.data.frame(my_sample_col)
colnames(y) <- c("Depth", "sample_type")
z <- dplyr::left_join(rownames_to_column(x), rownames_to_column(y), by=c("rowname" = "rowname"))
colnames(z)[1] <- "ID"
f <- z %>%
group_by(sample_type, Depth) %>%
summarise_all(mean)
# remove ID column
f <- f[ , -3]
# make new ID column, which is the soiltypedepth
library(stringr)
f$ID <- str_c(f$sample_type, '_', f$Depth)
# make new df with just sample type, depth and sampletypedepth
df <- f[c("sample_type", "Depth", "ID")]
# remove extra columns from f
f2 <- f[, -c(1, 2, 72)]
# column into rownames
rownames(f2) <- f$ID
# make into numeric matrix
f3 <- data.matrix(f2, rownames.force = NA)
f4 <- t(f3)
# same for df
# remove extra columns from df
df2 <- df[ , -c(3)]
# column into rownames
rownames(df2) <- df$ID
my_sample_col2 <- df2
my_sample_col3 <- as.data.frame(my_sample_col2)
rownames(my_sample_col3) <- df$ID
# lets add annotations of samples
# add annotations "Depth" and "soil_type" and change order
colnames(my_sample_col3) <- c("Treatment", "Depth")
my_sample_col3 <- my_sample_col3[, c(2,1)]
library("pheatmap")
library("ggplotify")
# only keep the genus and FUNGuild
x <- FG_tax[, c(9, 3)]
# Remove duplicates by single column
FUNGuild_tax_table <- x[!duplicated(x$genus), ]
dim(FUNGuild_tax_table)
## [1] 943 2
# and genus as row names
rownames(FUNGuild_tax_table) <- NULL
FUNGuild_tax_table <- column_to_rownames(FUNGuild_tax_table, var = "genus")
# view data frame
unique(FUNGuild_tax_table$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Saprotroph"
## [7] "Endophyte" "Ectomycorrhizal"
## [9] "Other Pathotroph" "Plant Pathogen"
## [11] "Other Symbiotroph" "Pathotroph-Symbiotroph"
FUNGuild_tax_table$FUNGuild <- as.factor(FUNGuild_tax_table$FUNGuild)
# change level order
FUNGuild_tax_table$FUNGuild <- factor(FUNGuild_tax_table$FUNGuild, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
## [1] "Plant Pathogen" "Other Pathotroph"
## [3] "Pathotroph-Saprotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Symbiotroph" "Saprotroph"
## [7] "Saprotroph-Symbiotroph" "Other Symbiotroph"
## [9] "Ectomycorrhizal" "Endophyte"
## [11] "Arbuscular Mycorrhizal"
#Create color palette
my_colour = list(
"Treatment" = c(meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c"),
Depth = c('0-10 cm' = "#387212", '10-20 cm' = "#ADC476", '20-30 cm' = "#D8D2BA",'30-40 cm' = "#907852", '40-80 cm' = "#6A4C3A"),
FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))
# add gap between sample_types
gp_col = which(diff(as.numeric(factor(my_sample_col3$Treatment)))!=0)
p2 <- as.ggplot(function() pheatmap(f4, cluster_cols = FALSE, cluster_rows = TRUE, annotation_col = my_sample_col3, annotation_colors = my_colour, gaps_col = gp_col, color=colorRampPalette(c("navy", "white", "red"))(50), show_colnames = FALSE, legend = TRUE, annotation_row = FUNGuild_tax_table, border_color = NA, cellheight = 16, fontsize = 14, fontsize_row = 14, annotation_names_row = FALSE, annotation_names_col = FALSE))
p2
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type=="forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 5398 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 5398 taxa by 7 taxonomic ranks ]
meta_nf <- meta(ps_RA_nf)
I will use RA values For the final heatmap figure the RA values will be standardizes (z-transformed)
ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/140)
ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
ps_genus_RA <- microbiome::transform(ps_genus, "compositional")
ps_genus_RA_pruned <- subset_samples(ps_genus_RA, sample_type=="forest")
ps_genus_RA_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
# sample wise filtering according to most abund. genera
# Initialize an empty list to store the taxa
abund.taxa <- list()
for (i in meta_nf$sample_type) {
for (j in meta_nf$depth) {
x <- sample_names(sample_data(ps_genus_RA_pruned)[sample_data(ps_genus_RA_pruned)$sample_type == i & sample_data(ps_genus_RA_pruned)$depth == j,])
# Calculate taxa mean of the selected samples
top10 <- head(sort(rowMeans(otu_table(ps_genus_RA_pruned)[,x]), decreasing = TRUE), 10)
result_name <- paste("sample_type", i, "depth", j, sep = "_")
abund.taxa[[result_name]] <- top10
}
}
management_layer <- c(names(abund.taxa))
all_top10 <- c()
for (i in management_layer) {
top10 <- c(names(abund.taxa[[i]]))
all_top10 <- c(all_top10, top10)
}
all_top10_unique <- unique(all_top10)
length(all_top10_unique)
## [1] 31
# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)
# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top10_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 31 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 31 taxa by 1 taxonomic ranks ]
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
ps_genus_FOREST_ONLY_HETAMAP <- ps_RA_nf_genus_pruned
#save(ps_genus_FOREST_ONLY_HETAMAP, file='ps_genus_nf_HETAMAP_all_top10_FOREST_ONLY_unclassified_included')
ps_Heatmap <- ps_genus_FOREST_ONLY_HETAMAP
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 31 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 31 taxa by 1 taxonomic ranks ]
df <- psmelt(ps_Heatmap)
x <- df %>%
group_by(OTU, sample_type, depth) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 155 × 5
## # Groups: OTU, sample_type [31]
## OTU sample_type depth mean se
## <chr> <fct> <chr> <dbl> <dbl>
## 1 Ascomycota_unclassified forest 0...10 0.0204 0.00318
## 2 Ascomycota_unclassified forest 10...20 0.0206 0.00656
## 3 Ascomycota_unclassified forest 20...30 0.0245 0.0166
## 4 Ascomycota_unclassified forest 30...40 0.00872 0.00634
## 5 Ascomycota_unclassified forest 40... 0.0458 0.0458
## 6 Botrytis forest 0...10 0.0000325 0.0000325
## 7 Botrytis forest 10...20 0.0000800 0.0000800
## 8 Botrytis forest 20...30 0 0
## 9 Botrytis forest 30...40 0 0
## 10 Botrytis forest 40... 0.163 0.163
## # ℹ 145 more rows
I need to make a separate funguild phyloseq for heatmap, where I have only the genus and higher level annotation (no species level)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
FG <- read.csv2("FUNGuild_31_05_2024.csv")
# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus" "Species" "Variety" "Family" "Order"
## [6] "Phylum" "Form" "Subspecies"
I will get the annotations from genus and higher tax levels:
Genus
fg <- FG[FG$taxonomicLevel == "Genus", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
# and OTU as row names
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"
# save with new name
FUNGuild_gen <- FG_tax_table
Family
fg <- FG[FG$taxonomicLevel == "Family", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"
# save with new name
FUNGuild_fam <- FG_tax_table
Order
fg <- FG[FG$taxonomicLevel == "Order", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"
# save with new name
FUNGuild_ord <- FG_tax_table
Phylum
fg <- FG[FG$taxonomicLevel == "Phylum", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"
# save with new name
FUNGuild_phy <- FG_tax_table
Combine all annotations:Genus Family Order Phylum
x <- left_join(rownames_to_column(FUNGuild_gen), rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA
y <- x %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_fam))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_fam))
# same for order
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_ord))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_ord))
# same for phylum
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_phy))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_phy))
# then rename the gen columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"
# remove the rest of the columns
y <- y[, -c(4:11)]
# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# Remove empty spaces and remove also "|"
y2$trophicMode <- gsub(" ", "", y2$trophicMode, fixed = TRUE)
y2$guild <- gsub("|", "", y2$guild, fixed = TRUE)
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]
Define AMFs, Ectomycorrhizal and Plant pathogens
Here in FUNGuild column:
z <- y3
z <- z %>%
mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" | FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" | FG != "Arbuscular Mycorrhizal" | FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
FG_tax_table <- z
# remove the FG column
FG_tax_table <- FG_tax_table[, -c(11)]
Check the different written forms, if I have empty spaces?
unique(FG_tax_table$trophicMode)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Pathotroph" "Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [9] "Saprotroph-Saprotroph-Symbiotroph" "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Plant Pathogen" "Ectomycorrhizal"
## [7] "Arbuscular Mycorrhizal" "Pathotroph"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [11] "Symbiotroph" "Saprotroph-Saprotroph-Symbiotroph"
## [13] "Endophyte" "Pathotroph-Pathotroph-Saprotroph"
# change some names for FUNGuild
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"
# and for trophicMode
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"
# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]
Check again
unique(FG_tax_table$trophicMode)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Pathotroph" "Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Plant Pathogen" "Ectomycorrhizal"
## [7] "Arbuscular Mycorrhizal" "Other Pathotroph"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [11] "Other Symbiotroph" "Endophyte"
ps_FG_HEATMAP_no_clusters <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG_HEATMAP_no_clusters
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
FG_tax <- as.data.frame(as.matrix(tax_table(ps_FG_HEATMAP_no_clusters)))
ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/100)
ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
# subset
ps_genus_NO_FOREST <- subset_samples(ps_genus, sample_type=="forest")
ps_genus_NO_FOREST
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
meta_nf <- meta(ps_genus_NO_FOREST)
meta_nf$Depth <- gsub("...", "-", meta_nf$depth, fixed = TRUE)
meta_nf$Depth[meta_nf$Depth=="40-"] <- "40-80"
meta_nf$Depth[meta_nf$Depth=="0-10"] <- "0-10 cm"
meta_nf$Depth[meta_nf$Depth=="10-20"] <- "10-20 cm"
meta_nf$Depth[meta_nf$Depth=="20-30"] <- "20-30 cm"
meta_nf$Depth[meta_nf$Depth=="30-40"] <- "30-40 cm"
meta_nf$Depth[meta_nf$Depth=="40-80"] <- "40-80 cm"
# RA-transform
ps_genus_NO_FOREST_RA <- microbiome::transform(ps_genus_NO_FOREST, 'compositional')
# Z-transform
ps_genus_NO_FOREST_RA_z <- microbiome::transform(ps_genus_NO_FOREST_RA, 'Z', log10 = FALSE)
# subset to only the 30 genera from above
hetamap.taxa <- taxa_names(ps_Heatmap)
data <- as(otu_table(ps_genus_NO_FOREST_RA_z), "matrix")
data <- as.data.frame(data)
data_subset <- data[(row.names(data) %in% hetamap.taxa),]
# now only 30 genuses, as should be
data_subset <- as.matrix(data_subset)
# add annotations "Depth" and "Treatment"
my_sample_col <- data.frame(meta_nf[c("Depth", "sample_type")], row.names = row.names(meta_nf))
colnames(my_sample_col) <- c("Depth", "Treatment")
x <- as.data.frame(t(data_subset))
y <- as.data.frame(my_sample_col)
colnames(y) <- c("Depth", "sample_type")
z <- dplyr::left_join(rownames_to_column(x), rownames_to_column(y), by=c("rowname" = "rowname"))
colnames(z)[1] <- "ID"
f <- z %>%
group_by(sample_type, Depth) %>%
summarise_all(mean)
# remove ID column
f <- f[ , -3]
# make new ID column, which is the soiltypedepth
library(stringr)
f$ID <- str_c(f$sample_type, '_', f$Depth)
# make new df with just sample type, Depth and sampletypedepth
df <- f[c("sample_type", "Depth", "ID")]
# remove extra columns from f
f2 <- f[, -c(1, 2, 34)]
# column into rownames
rownames(f2) <- f$ID
# make into numeric matrix
f3 <- data.matrix(f2, rownames.force = NA)
f4 <- t(f3)
# same for df
# remove extra columns from df
df2 <- df[ , -c(3)]
# column into rownames
rownames(df2) <- df$ID
my_sample_col2 <- df2
my_sample_col3 <- as.data.frame(my_sample_col2)
rownames(my_sample_col3) <- df$ID
# lets add annotations of samples
# add annotations "Depth" and "soil_type" and change order
colnames(my_sample_col3) <- c("Treatment", "Depth")
my_sample_col3 <- my_sample_col3[, c(2,1)]
library("pheatmap")
# only keep the genus and FUNGuild
x <- FG_tax[, c(9, 3)]
# Remove duplicates by single column
FUNGuild_tax_table <- x[!duplicated(x$genus), ]
dim(FUNGuild_tax_table)
## [1] 943 2
# and genus as row names
rownames(FUNGuild_tax_table) <- NULL
FUNGuild_tax_table <- column_to_rownames(FUNGuild_tax_table, var = "genus")
# view data frame
unique(FUNGuild_tax_table$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Saprotroph"
## [7] "Endophyte" "Ectomycorrhizal"
## [9] "Other Pathotroph" "Plant Pathogen"
## [11] "Other Symbiotroph" "Pathotroph-Symbiotroph"
FUNGuild_tax_table$FUNGuild <- as.factor(FUNGuild_tax_table$FUNGuild)
# change level order
FUNGuild_tax_table$FUNGuild <- factor(FUNGuild_tax_table$FUNGuild, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
## [1] "Plant Pathogen" "Other Pathotroph"
## [3] "Pathotroph-Saprotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Symbiotroph" "Saprotroph"
## [7] "Saprotroph-Symbiotroph" "Other Symbiotroph"
## [9] "Ectomycorrhizal" "Endophyte"
## [11] "Arbuscular Mycorrhizal"
#Create color palette
my_colour = list(
"Treatment" = c(forest = "#1167b1"),
Depth = c('0-10 cm' = "#387212", '10-20 cm' = "#ADC476", '20-30 cm' = "#D8D2BA",'30-40 cm' = "#907852", '40-80 cm' = "#6A4C3A"),
FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))
p3 <- as.ggplot(function() pheatmap(f4, cluster_cols = FALSE, cluster_rows = TRUE, annotation_col = my_sample_col3, annotation_colors = my_colour, color=colorRampPalette(c("navy", "white", "red"))(50), show_colnames = FALSE, legend = TRUE, annotation_row = FUNGuild_tax_table, border_color = NA, cellheight = 16, fontsize = 14, fontsize_row = 14, annotation_names_row = FALSE, annotation_names_col = FALSE))
p3
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(metagMisc)
library(pheatmap)
library(metagMisc)
library(RColorBrewer)
library(viridis)
library(tidyverse)
library(ggpubr)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
Note, due to reviewers comment, I will redo the Functional guild composition plot so that Unassigned are kept!!!
FG_tax <- ps_FG %>% tax_table() %>% as.data.frame()
unique(FG_tax$trophicMode)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Symbiotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph"
## [7] "Saprotroph" "Pathotroph-Symbiotroph"
Note! In the composition figure:
z <- FG_tax
z <- z %>%
mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" | FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" | FG != "Arbuscular Mycorrhizal" | FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
# change some names for FUNGuild
z$FUNGuild[z$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
z$FUNGuild[z$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
# remove species and FG
tax <- z[, -c(10, 12)]
unique(tax$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Other Pathotroph"
## [7] "Saprotroph" "Endophyte"
## [9] "Ectomycorrhizal" "Pathotroph-Symbiotroph"
## [11] "Plant Pathogen" "Other Symbiotroph"
# rename FUNGuild to species
colnames(tax)[3] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 12 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 12 taxa by 2 taxonomic ranks ]
# 12 taxa and 140 samples
# lets not remove NAs!!
# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# would be 11 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
df <- psmelt(x_RA)
sampletype_names <- list(
'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)
sampletype_labeller <- function(variable,value){
return(sampletype_names[value])
}
df$species <- factor(df$species)
levels(df$species)
## [1] "Arbuscular Mycorrhizal" "Ectomycorrhizal"
## [3] "Endophyte" "Other Pathotroph"
## [5] "Other Symbiotroph" "Pathotroph-Saprotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [9] "Plant Pathogen" "Saprotroph"
## [11] "Saprotroph-Symbiotroph" "Unknown"
# change level order
df$species <- factor(df$species, levels = c("Unknown","Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(df$species)
## [1] "Unknown" "Plant Pathogen"
## [3] "Other Pathotroph" "Pathotroph-Saprotroph"
## [5] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [7] "Saprotroph" "Saprotroph-Symbiotroph"
## [9] "Other Symbiotroph" "Ectomycorrhizal"
## [11] "Endophyte" "Arbuscular Mycorrhizal"
#Create color palette
my_colour = list(
"sample" = c(meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c"),
depth = c('0...10' = "#387212", '10...20' = "#ADC476", '20...30' = "#D8D2BA",'30...40' = "#907852", '40...' = "#6A4C3A"),
FUNGuild = c("Unknown" = "grey", "Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))
# Make new depth variable
df$Depth <- df$depth
df$Depth <- gsub("...", "-", df$Depth, fixed = TRUE)
df$Depth[df$Depth=="40-"] <- "40-80"
df$Depth[df$Depth=="0-10"] <- "0-10 cm"
df$Depth[df$Depth=="10-20"] <- "10-20 cm"
df$Depth[df$Depth=="20-30"] <- "20-30 cm"
df$Depth[df$Depth=="30-40"] <- "30-40 cm"
df$Depth[df$Depth=="40-80"] <- "40-80 cm"
FG <- ggplot(df, aes(x = Depth ,y = Abundance, fill = species)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = my_colour[["FUNGuild"]]) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=14),
axis.title = element_text(size=18),
legend.text = element_text(size=16),
legend.title = element_text(size=18),
legend.spacing.y = unit(0, 'cm'),
legend.key.size = unit(0.8, 'cm'),
title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(nrow = 4, title="")) + theme(strip.text.x = element_text(size = 22)) +
ylab(label = "Relative abundance") + theme(legend.position="top") + xlab("Depth")
FG
ps_RA <- microbiome::transform(ps, 'compositional')
ps_RA.class <- aggregate_rare(ps_RA, level = 'class', detection = 3/100, prevalence = 3/140, include.lowest = TRUE)
ps_RA.class
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 16 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 16 taxa by 2 taxonomic ranks ]
#create data table
ps_RA.class_df <- psmelt(ps_RA.class)
#Create color palette
#set color palette from RColorBrewer
# Define the number of colors you want
library("RColorBrewer") # nice color options
nb.cols = length(unique((as.data.frame(ps_RA.class@tax_table))$class))
cbbPalette <- colorRampPalette(brewer.pal(12, "Set3"))(nb.cols)
sampletype_names <- list(
'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)
sampletype_labeller <- function(variable,value){
return(sampletype_names[value])
}
# check unique values for class
unique(ps_RA.class_df$class)
## [1] "Leotiomycetes" "Agaricomycetes"
## [3] "Dothideomycetes" "Sordariomycetes"
## [5] "Archaeosporomycetes" "Mortierellomycetes"
## [7] "Geoglossomycetes" "Tremellomycetes"
## [9] "Ascomycota_unclassified" "Glomeromycetes"
## [11] "Microbotryomycetes" "Pezizomycetes"
## [13] "Other" "Eurotiomycetes"
## [15] "Basidiomycota_unclassified" "Orbiliomycetes"
is.factor(ps_RA.class_df$class)
## [1] FALSE
ps_RA.class_df$class <- as.factor(ps_RA.class_df$class)
levels(ps_RA.class_df$class)
## [1] "Agaricomycetes" "Archaeosporomycetes"
## [3] "Ascomycota_unclassified" "Basidiomycota_unclassified"
## [5] "Dothideomycetes" "Eurotiomycetes"
## [7] "Geoglossomycetes" "Glomeromycetes"
## [9] "Leotiomycetes" "Microbotryomycetes"
## [11] "Mortierellomycetes" "Orbiliomycetes"
## [13] "Other" "Pezizomycetes"
## [15] "Sordariomycetes" "Tremellomycetes"
ps_RA.class_df$class <- relevel(ps_RA.class_df$class, "Other")
cbbPalette <- c("#b2b2b2", "#8DD3C7", "#FFED6F", "#CAAEC5", "#F68378", "#8D6942", "#F3B962", "#BCD868", "#6E99BE", "#F0D1E1", "#C191C2", "#FFFFC6", "darkgreen", "#D0D9CD", "#8BC081", "#FF8DB5")
# plot with detection = 1/100, prevalence = 2/100
#Create a plot
classF <- ggplot(ps_RA.class_df, aes(x = depth ,y = Abundance, fill = class)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=14),
axis.title = element_text(size=18),
legend.text = element_text(size=16),
legend.title = element_text(size=18),
legend.spacing.y = unit(0, 'cm'),
legend.key.size = unit(0.8, 'cm'),
title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(nrow = 6, title="")) + theme(strip.text.x = element_text(size = 22)) +
ylab(label = "Relative abundance") + theme(legend.position="top") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x = element_blank())
classF
ps_RA_phyla_aggr <- aggregate_rare(ps_RA, level = 'phylum', detection = 2/100, prevalence = 2/140)
ps_RA_phyla_aggr
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 6 taxa by 2 taxonomic ranks ]
#create data table
ps_RA_phyla_df <- psmelt(ps_RA_phyla_aggr)
#Create color palette
cbbPalette <- c("#666666","#1B9E77", "#D95F02", "#E7298A", "#7570B3", "#66A61E")
sampletype_names <- list(
'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)
sampletype_labeller <- function(variable,value){
return(sampletype_names[value])
}
# check unique values for phylum
unique(ps_RA_phyla_df$phylum)
## [1] "Ascomycota" "Basidiomycota" "Glomeromycota"
## [4] "Mortierellomycota" "Rozellomycota" "Other"
# [1] "Ascomycota" "Basidiomycota" "Glomeromycota" "Mortierellomycota"
# [5] "Rozellomycota" "Other"
ps_RA_phyla_df$phylum <- as.factor(ps_RA_phyla_df$phylum)
levels(ps_RA_phyla_df$phylum)
## [1] "Ascomycota" "Basidiomycota" "Glomeromycota"
## [4] "Mortierellomycota" "Other" "Rozellomycota"
ps_RA_phyla_df$phylum <- factor(ps_RA_phyla_df$phylum, levels = c("Other","Ascomycota", "Basidiomycota", "Glomeromycota", "Mortierellomycota", "Rozellomycota"))
levels(ps_RA_phyla_df$phylum)
## [1] "Other" "Ascomycota" "Basidiomycota"
## [4] "Glomeromycota" "Mortierellomycota" "Rozellomycota"
#Create a plot
phylumF <- ggplot(ps_RA_phyla_df, aes(x = depth ,y = Abundance, fill = phylum)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=14),
axis.title = element_text(size=18),
legend.text = element_text(size=16),
legend.title = element_text(size=18),
legend.spacing.y = unit(0, 'cm'),
legend.key.size = unit(0.8, 'cm'),
title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(title="")) + theme(strip.text.x = element_text(size = 22)) +
ylab(label = "Relative abundance") + theme(legend.position="top") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x = element_blank())
phylumF
library(ggplotify)
left <- ggarrange(phylumF, classF , FG,
labels = c("A", "B", "C"),
ncol = 1, nrow = 3, heights = c(1.1, 1.5, 1.5))
left
right <- ggarrange(p2, p3,
labels = c("D", "E"),
ncol = 1, nrow = 2, heights = c(2.7, 1.4))
right
figure <- ggarrange(left, right,
ncol = 2, nrow = 1, heights = c(1, 1), widths = c(1, 1))
figure
Saved with width 2200 and height 2600
Note! While doing tests, always check the homogeneity of variance result and decide based on that which later test result to use (Anova and Tukey or Kruskal and Wilcoxon)
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
Note:
# remove species
z <- as.data.frame(tax_table(ps_FG))
tax <- z[, 1:9]
# rename FUNGuild to species
colnames(tax)[3] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 12 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 12 taxa by 2 taxonomic ranks ]
# 12 taxa and 140 samples
# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 11 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
FG_df <- psmelt(x_RA)
unique(FG_df$OTU)
## [1] "Saprotroph" "Saprotroph-Symbiotroph"
## [3] "Unknown" "Ectomycorrhizal"
## [5] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph"
## [7] "Pathotroph-Symbiotroph" "Endophyte"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Other Pathotroph"
## [11] "Plant Pathogen" "Other Symbiotroph"
taxa <- "Plant Pathogen"
# Construct a data.frame with the selected
# taxonomic group and grouping
# relative
df <- filter(FG_df, OTU == taxa)
library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 6.659 0.0003143 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
not homogenous
# perform the Kruskal test
kruskal.test(Abundance ~ sample_type, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 27.409, df = 3, p-value = 4.833e-06
pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.12765 - -
## organic 0.00090 0.00090 -
## conventional 0.00074 0.00074 0.65964
##
## P value adjustment method: BH
x <- df %>%
group_by(sample_type) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 4 × 3
## sample_type mean se
## <fct> <dbl> <dbl>
## 1 forest 0.00392 0.00110
## 2 meadow 0.0120 0.00406
## 3 organic 0.0326 0.00450
## 4 conventional 0.0291 0.00399
unique(FG_df$OTU)
## [1] "Saprotroph" "Saprotroph-Symbiotroph"
## [3] "Unknown" "Ectomycorrhizal"
## [5] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph"
## [7] "Pathotroph-Symbiotroph" "Endophyte"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Other Pathotroph"
## [11] "Plant Pathogen" "Other Symbiotroph"
taxa <- "Endophyte"
# Construct a data.frame with the selected
# taxonomic group and grouping
# relative
df <- filter(FG_df, OTU == taxa)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.0924 0.3547
## 136
# perform the Kruskal test
kruskal.test(Abundance ~ sample_type, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 17.125, df = 3, p-value = 0.0006663
pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.4489 - -
## organic 0.0090 0.0084 -
## conventional 0.0144 0.0123 0.9508
##
## P value adjustment method: BH
x <- df %>%
group_by(sample_type) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 4 × 3
## sample_type mean se
## <fct> <dbl> <dbl>
## 1 forest 0.00956 0.00571
## 2 meadow 0.0113 0.00320
## 3 organic 0.0299 0.00821
## 4 conventional 0.0243 0.00641
# remove species
z <- as.data.frame(tax_table(ps_FG))
tax <- z[, 1:9]
# rename trophicmode to species
colnames(tax)[1] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples
# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 7 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
FG_df <- psmelt(x_RA)
unique(FG_df$species)
## [1] "Saprotroph" "Saprotroph-Symbiotroph"
## [3] "Unknown" "Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph-Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph"
FG_df$species <- as.factor(FG_df$species)
for (i in levels(FG_df$species)) {
df <- filter(FG_df, species == i)
print(i)
result = leveneTest(Abundance ~ sample_type, df)
print(result)
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)
w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
print(w)
res.aov <- aov(Abundance ~ sample_type, data = df)
aov <- summary(res.aov)
print(aov)
tukey <- TukeyHSD(res.aov)
print(tukey)
}
## [1] "Pathotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 14.948 1.826e-08 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 24.347, df = 3, p-value = 2.115e-05
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.0506 - -
## organic 0.0021 0.0042 -
## conventional 0.0012 0.0012 0.3715
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.1020 0.03400 11.43 9.88e-07 ***
## Residuals 136 0.4046 0.00297
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 0.01618704 -0.02676495 0.05913903 0.7609271
## organic-forest 0.05980597 0.01685398 0.10275796 0.0023053
## conventional-forest 0.07068982 0.02839362 0.11298602 0.0001562
## organic-meadow 0.04361893 0.01189678 0.07534108 0.0026909
## conventional-meadow 0.05450278 0.02367439 0.08533116 0.0000565
## conventional-organic 0.01088385 -0.01994454 0.04171223 0.7951192
##
## [1] "Pathotroph-Saprotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.6236 0.6009
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 16.639, df = 3, p-value = 0.0008384
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.97764 - -
## organic 0.48530 0.22650 -
## conventional 0.04408 0.00032 0.04408
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.062 0.02067 2.098 0.103
## Residuals 136 1.340 0.00985
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.0272793473 -0.105437495 0.05087880 0.8006894
## organic-forest 0.0001389386 -0.078019209 0.07829709 1.0000000
## conventional-forest 0.0268088430 -0.050155981 0.10377367 0.8016557
## organic-meadow 0.0274182860 -0.030305332 0.08514190 0.6054541
## conventional-meadow 0.0540881903 -0.002009083 0.11018546 0.0631532
## conventional-organic 0.0266699044 -0.029427369 0.08276718 0.6047453
##
## [1] "Pathotroph-Saprotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 16.618 2.955e-09 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 12.707, df = 3, p-value = 0.005316
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.148 - -
## organic 0.303 0.023 -
## conventional 0.442 0.012 0.159
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.05287 0.017622 9.025 1.72e-05 ***
## Residuals 136 0.26556 0.001953
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.02117396 -0.055973028 0.01362511 0.3919150
## organic-forest 0.02920514 -0.005593926 0.06400421 0.1331853
## conventional-forest -0.00451020 -0.038777956 0.02975755 0.9861429
## organic-meadow 0.05037910 0.024678286 0.07607992 0.0000067
## conventional-meadow 0.01666376 -0.008312946 0.04164046 0.3095042
## conventional-organic -0.03371534 -0.058692048 -0.00873864 0.0033576
##
## [1] "Pathotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.342 0.2634
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 10.052, df = 3, p-value = 0.01813
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.104 - -
## organic 0.736 0.104 -
## conventional 0.736 0.023 0.394
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.0032 0.001068 1.35 0.261
## Residuals 136 0.1076 0.000791
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 0.0110798586 -0.01106945 0.033229164 0.5637908
## organic-forest 0.0082373461 -0.01391196 0.030386651 0.7682090
## conventional-forest 0.0003225014 -0.02148863 0.022133630 0.9999795
## organic-meadow -0.0028425125 -0.01920086 0.013515834 0.9691437
## conventional-meadow -0.0107573571 -0.02665481 0.005140098 0.2971540
## conventional-organic -0.0079148447 -0.02381230 0.007982610 0.5676954
##
## [1] "Saprotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 4.0842 0.008198 **
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 31.546, df = 3, p-value = 6.522e-07
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 5.2e-05 - -
## organic 0.161 9.4e-06 -
## conventional 0.057 8.4e-05 0.388
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 1.746 0.5819 15.82 7.01e-09 ***
## Residuals 136 5.003 0.0368
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 0.30830047 0.15726083 0.45934011 0.0000026
## organic-forest 0.06074104 -0.09029860 0.21178068 0.7226932
## conventional-forest 0.10108566 -0.04764790 0.24981922 0.2933518
## organic-meadow -0.24755943 -0.35910960 -0.13600926 0.0000003
## conventional-meadow -0.20721481 -0.31562209 -0.09880753 0.0000116
## conventional-organic 0.04034462 -0.06806266 0.14875190 0.7678339
##
## [1] "Saprotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.4348 0.06754 .
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 0.10016, df = 3, p-value = 0.9918
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.99 - -
## organic 0.99 0.99 -
## conventional 0.99 0.99 0.99
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.0623 0.02076 0.921 0.433
## Residuals 136 3.0669 0.02255
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.03010025 -0.1483602 0.08815971 0.9111192
## organic-forest -0.00292569 -0.1211857 0.11533427 0.9999039
## conventional-forest -0.05189731 -0.1683517 0.06455705 0.6535488
## organic-meadow 0.02717456 -0.0601662 0.11451533 0.8499874
## conventional-meadow -0.02179705 -0.1066770 0.06308292 0.9089813
## conventional-organic -0.04897162 -0.1338516 0.03590835 0.4399103
##
## [1] "Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 14.22 4.099e-08 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 13.84, df = 3, p-value = 0.003131
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.0022 - -
## organic 0.0028 0.5867 -
## conventional 0.0026 0.8775 0.8775
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.5949 0.19829 13.83 6.34e-08 ***
## Residuals 136 1.9500 0.01434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.21179677 -0.30609492 -0.11749863 0.0000002
## organic-forest -0.19023387 -0.28453201 -0.09593572 0.0000034
## conventional-forest -0.21889086 -0.31174926 -0.12603247 0.0000001
## organic-meadow 0.02156291 -0.04808089 0.09120671 0.8518287
## conventional-meadow -0.00709409 -0.07477570 0.06058752 0.9928863
## conventional-organic -0.02865700 -0.09633860 0.03902461 0.6893331
##
## [1] "Unknown"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.9805 0.1198
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 13.761, df = 3, p-value = 0.003249
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.62012 - -
## organic 0.62012 0.15804 -
## conventional 0.26905 0.00055 0.15804
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.328 0.10930 3.305 0.0223 *
## Residuals 136 4.498 0.03307
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.04521703 -0.18843861 0.09800454 0.8443579
## organic-forest 0.03503112 -0.10819045 0.17825269 0.9201414
## conventional-forest 0.07639155 -0.06464330 0.21742640 0.4960123
## organic-meadow 0.08024815 -0.02552799 0.18602429 0.2031930
## conventional-meadow 0.12160859 0.01881266 0.22440452 0.0133339
## conventional-organic 0.04136044 -0.06143550 0.14415637 0.7223811
x <- FG_df %>% group_by(species, sample_type) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 32 × 4
## # Groups: species [8]
## species sample_type mean se
## <fct> <fct> <dbl> <dbl>
## 1 Pathotroph forest 0.0104 0.00378
## 2 Pathotroph meadow 0.0266 0.00556
## 3 Pathotroph organic 0.0702 0.00985
## 4 Pathotroph conventional 0.0811 0.0101
## 5 Pathotroph-Saprotroph forest 0.0841 0.0332
## 6 Pathotroph-Saprotroph meadow 0.0568 0.0102
## 7 Pathotroph-Saprotroph organic 0.0842 0.0153
## 8 Pathotroph-Saprotroph conventional 0.111 0.0171
## 9 Pathotroph-Saprotroph-Symbiotroph forest 0.0315 0.0153
## 10 Pathotroph-Saprotroph-Symbiotroph meadow 0.0104 0.00266
## # ℹ 22 more rows
ps_x <- subset_samples(ps_FG, sample_type=="forest" & depth=="40...")
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 3 samples ]
## sample_data() Sample Data: [ 3 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
ps_FG_Tm <- aggregate_rare(ps_x, level = "trophicMode", detection = 0, prevalence = 0)
ps_FG_Tm
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 3 samples ]
## sample_data() Sample Data: [ 3 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 15 samples
# remove "Unknown"
#allTaxa = taxa_names(ps_FG_Tm)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#ps_FG_Tm <- prune_taxa(myTaxa, ps_FG_Tm)
#ps_FG_Tm
# 7 taxa and 15 samples
ps_FG_Tm_RA <- microbiome::transform(ps_FG_Tm, "compositional")
FG_df <- psmelt(ps_FG_Tm_RA)
x <- FG_df %>%
group_by(OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 8 × 3
## OTU mean se
## <chr> <dbl> <dbl>
## 1 Pathotroph 0.00245 0.00245
## 2 Pathotroph-Saprotroph 0.172 0.159
## 3 Pathotroph-Saprotroph-Symbiotroph 0.00880 0.00880
## 4 Pathotroph-Symbiotroph 0 0
## 5 Saprotroph 0.00920 0.00898
## 6 Saprotroph-Symbiotroph 0.0130 0.0129
## 7 Symbiotroph 0.464 0.0574
## 8 Unknown 0.331 0.166
# remove species
z <- as.data.frame(tax_table(ps_FG))
tax <- z[, 1:9]
# rename trophic mode to species
colnames(tax)[1] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples
# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 2 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
FG_df <- psmelt(x_RA)
# remove forest
FG_df <- subset(FG_df, sample_type!="forest" | sample_type!="meadow")
unique(FG_df$species)
## [1] "Saprotroph" "Saprotroph-Symbiotroph"
## [3] "Unknown" "Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph-Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph"
df <- filter(FG_df, species == "Symbiotroph")
result = leveneTest(Abundance ~ depth, df)
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 8.1269 6.674e-06 ***
## 135
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 21.947, df = 4, p-value = 0.0002054
w <- pairwise.wilcox.test(df$Abundance, df$depth,
p.adjust.method = "BH")
print(w)
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$depth
##
## 0...10 10...20 20...30 30...40
## 10...20 0.0038 - - -
## 20...30 6.6e-05 0.0134 - -
## 30...40 0.0009 0.0118 0.2752 -
## 40... 0.2752 0.5454 0.7618 0.6229
##
## P value adjustment method: BH
x <- df %>% group_by(depth) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 5 × 3
## depth mean se
## <chr> <dbl> <dbl>
## 1 0...10 0.0157 0.00272
## 2 10...20 0.0279 0.00575
## 3 20...30 0.0560 0.0123
## 4 30...40 0.151 0.0413
## 5 40... 0.126 0.0305
# remove species
z <- as.data.frame(tax_table(ps_FG))
tax <- z[, 1:9]
# rename trophic mode to species
colnames(tax)[1] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples
# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 2 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
FG_df <- psmelt(x_RA)
# remove forest
FG_df <- subset(FG_df, sample_type!="forest" & sample_type!="meadow")
unique(FG_df$species)
## [1] "Saprotroph-Symbiotroph" "Saprotroph"
## [3] "Unknown" "Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph-Symbiotroph"
## [7] "Pathotroph" "Pathotroph-Saprotroph-Symbiotroph"
df <- filter(FG_df, species == "Saprotroph")
result = leveneTest(Abundance ~ depth, df)
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 2.5144 0.04792 *
## 80
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 24.835, df = 4, p-value = 5.43e-05
w <- pairwise.wilcox.test(df$Abundance, df$depth,
p.adjust.method = "BH")
print(w)
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$depth
##
## 0...10 10...20 20...30 30...40
## 10...20 0.73386 - - -
## 20...30 0.25672 0.25672 - -
## 30...40 0.00014 0.00014 0.00289 -
## 40... 0.02354 0.01348 0.16448 0.73386
##
## P value adjustment method: BH
x <- df %>% group_by(depth) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 5 × 3
## depth mean se
## <chr> <dbl> <dbl>
## 1 0...10 0.308 0.0280
## 2 10...20 0.311 0.0173
## 3 20...30 0.251 0.0331
## 4 30...40 0.112 0.0310
## 5 40... 0.199 0.0633
# remove species
z <- as.data.frame(tax_table(ps_FG))
tax <- z[, 1:9]
# rename trophic mode to species
colnames(tax)[1] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples
# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 2 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
FG_df <- psmelt(x_RA)
unique(FG_df$species)
## [1] "Saprotroph" "Saprotroph-Symbiotroph"
## [3] "Unknown" "Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph-Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph"
FG_df <- filter(FG_df, sample_type != "forest")
FG_df <- droplevels(FG_df)
# make factor
FG_df$species <- as.factor(FG_df$species)
FG_df$depth <- as.factor(FG_df$depth)
#create data frame with 0 rows and 6 columns
df_test <- data.frame(matrix(ncol = 6))
#provide column names
colnames(df_test) <- c('sample_type', 'species', 'depth', 'LevenesP', 'ANOVAsOrKrusalsP', 'mean_se_cld')
for (i in levels(FG_df$depth)) {
for (j in levels(FG_df$species)) {
df <- filter(FG_df, depth == i & species == j)
levene <- leveneTest(Abundance ~ sample_type, df)
if (levene[1,3]>0.05){
res.aov <- aov(Abundance ~ sample_type, data = df)
res.aov2 <- summary(res.aov)
res.aov2 <- res.aov2[[1]]
df.res.aov <- data.frame(matrix(ncol = 3, nrow =3))
colnames(df.res.aov) <- c('sample_type', 'LevenesP', 'ANOVAsOrKrusalsP')
df.res.aov$sample_type <- levels(FG_df$sample_type)
df.res.aov$LevenesP <- levene[1,3]
df.res.aov$ANOVAsOrKrusalsP <- res.aov2[1,5]
df.res.aov$LevenesP <- sprintf("%.3f", round(df.res.aov$LevenesP, digits = 3))
df.res.aov$ANOVAsOrKrusalsP <- sprintf("%.3f", round(df.res.aov$ANOVAsOrKrusalsP, digits = 3))
tukey <- glht(res.aov, linfct=mcp(sample_type="Tukey"))
cld <- cld(tukey)
cld <- cld[["mcletters"]][["Letters"]]
cld <- as.data.frame(cld)
cld$sample_type <- row.names(cld)
row.names(cld) <- NULL
} else {
res.aov2 <- kruskal.test(Abundance ~ sample_type, data = df)
df.res.aov <- data.frame(matrix(ncol = 3, nrow =3))
colnames(df.res.aov) <- c('sample_type', 'LevenesP', 'ANOVAsOrKrusalsP')
df.res.aov$sample_type <- levels(FG_df$sample_type)
df.res.aov$LevenesP <- levene[1,3]
df.res.aov$ANOVAsOrKrusalsP <- res.aov2[["p.value"]]
df.res.aov$LevenesP <- sprintf("%.3f", round(df.res.aov$LevenesP, digits = 3))
df.res.aov$ANOVAsOrKrusalsP <- sprintf("%.3f", round(df.res.aov$ANOVAsOrKrusalsP, digits = 3))
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
cld = fullPTable(wilcox.res)
cld[is.na(cld)] <- 0
cld <- multcompLetters(cld)
cld <- cld[["Letters"]]
cld <- as.data.frame(cld)
cld$sample_type <- row.names(cld)
row.names(cld) <- NULL
}
x <- df %>%
dplyr::group_by(sample_type) %>%
dplyr::summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x$mean <- sprintf("%.3f", round(x$mean, digits = 3))
x$se <- sprintf("%.3f", round(x$se, digits = 3))
x <- cbind(species = 0, x)
x$species <- j
x$depth <- i
output <- merge(x, df.res.aov, by='sample_type')
output <- merge(output, cld, by='sample_type')
output$mean_se = paste(output$mean, output$se, sep="±")
output$mean_se_cld = paste(output$mean_se, output$cld, sep="")
output <- output[, -c(3,4,8,9)]
df_test <- rbind(df_test, output)
}
}
df_test <- na.omit(df_test)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#write.csv2(df_test, file = "test.csv")
Note all correlations are done without forest!
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
# this aggregation makes NAs into "Unknown"
ps_FG_x <- aggregate_rare(ps_FG, level = 'trophicMode', detection = 0/100, prevalence = 0/140)
ps_FG_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa
# remove "Unknown"
#allTaxa = taxa_names(ps_FG_x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#ps_FG_x_pruned <- prune_taxa(myTaxa, ps_FG_x)
#ps_FG_x_pruned
# 7 taxa
FG_nf <- subset_samples(ps_FG_x, sample_type != "forest")
FG_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
FG_RA_nf <- transform(FG_nf, "compositional")
FG_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
df <- psmelt(FG_RA_nf)
df$year <- "2019"
# sum the values if same genus: Sum by Group Based on dplyr Package
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:6)
y
## # A tibble: 6 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Saprotroph 0.309 0.0199
## 2 2019 Unknown 0.307 0.0166
## 3 2019 Saprotroph-Symbiotroph 0.147 0.0129
## 4 2019 Pathotroph-Saprotroph 0.0851 0.00869
## 5 2019 Pathotroph 0.0602 0.00551
## 6 2019 Symbiotroph 0.0531 0.00847
taxa <- y$OTU
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$depth_numerical, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Saprotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 473587, p-value = 9.808e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.4549534
##
## [1] "Unknown"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 187721, p-value = 8.737e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4232854
##
## [1] "Saprotroph-Symbiotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 360251, p-value = 0.236
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.106762
##
## [1] "Pathotroph-Saprotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 304017, p-value = 0.4646
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.06600116
##
## [1] "Pathotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 540182, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6595454
##
## [1] "Symbiotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 241758, p-value = 0.003775
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2572713
ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_phy <- aggregate_rare(ps_RA_nf, level = "phylum", detection = 0.0, prevalence = 0.0)
ps_RA_nf_phy
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 14 taxa by 1 taxonomic ranks ]
df <- psmelt(ps_RA_nf_phy)
df$year <- "2019"
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Ascomycota 0.692 0.0151
## 2 2019 Basidiomycota 0.199 0.0134
## 3 2019 Mortierellomycota 0.0793 0.00956
## 4 2019 Glomeromycota 0.0264 0.00683
## 5 2019 Chytridiomycota 0.00209 0.000362
taxa <- y$OTU
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$depth_numerical, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Ascomycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 298097, p-value = 0.3506
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.08418676
##
## [1] "Basidiomycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 491192, p-value = 1.347e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5090399
##
## [1] "Mortierellomycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 245027, p-value = 0.005442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.24723
##
## [1] "Glomeromycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 257422, p-value = 0.01924
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2091478
##
## [1] "Chytridiomycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 569974, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7510735
ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_cla <- aggregate_rare(ps_RA_nf, level = "class", detection = 0.0, prevalence = 0.0)
ps_RA_nf_cla
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 65 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 65 taxa by 2 taxonomic ranks ]
df <- psmelt(ps_RA_nf_cla)
df$year <- "2019"
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes 0.291 0.0218
## 2 2019 Sordariomycetes 0.185 0.0131
## 3 2019 Dothideomycetes 0.137 0.0140
## 4 2019 Tremellomycetes 0.132 0.0111
## 5 2019 Mortierellomycetes 0.0793 0.00956
taxa <- y$OTU
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$depth_numerical, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Leotiomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 139957, p-value = 3.966e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5700243
##
## [1] "Sordariomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 426742, p-value = 0.0004147
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3110364
##
## [1] "Dothideomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 548039, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6836843
##
## [1] "Tremellomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 558143, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7147252
##
## [1] "Mortierellomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 245027, p-value = 0.005442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.24723
ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_gen <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0.0, prevalence = 0.0)
ps_RA_nf_gen
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 895 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 895 taxa by 2 taxonomic ranks ]
df <- psmelt(ps_RA_nf_gen)
Note! Exclude the obes not classified at genus level!
df$year <- "2019"
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:6)
print(y)
## # A tibble: 6 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes_unclassified 0.109 0.0168
## 2 2019 Saitozyma 0.0664 0.00702
## 3 2019 Pseudeurotium 0.0533 0.0122
## 4 2019 Paraphaeosphaeria 0.0516 0.0110
## 5 2019 Mortierella 0.0478 0.00662
## 6 2019 Solicoccozyma 0.0463 0.00584
We don’t want to test for Leotiomycetes_unclassified
taxa <- y$OTU
taxa[! taxa %in% c("Leotiomycetes_unclassified")]
## [1] "Saitozyma" "Pseudeurotium" "Paraphaeosphaeria"
## [4] "Mortierella" "Solicoccozyma"
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$depth_numerical, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Leotiomycetes_unclassified"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 186445, p-value = 6.745e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4272047
##
## [1] "Saitozyma"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 549794, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6890738
##
## [1] "Pseudeurotium"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 329787, p-value = 0.8841
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.01316959
##
## [1] "Paraphaeosphaeria"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 565995, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7388476
##
## [1] "Mortierella"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 239975, p-value = 0.003073
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2627505
##
## [1] "Solicoccozyma"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 545184, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6749137
library('phyloseq')
library("dplyr")
library("tibble")
library("microbiome")
library("tibble")
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
OTU <- as.data.frame(otu_table(ps))
OTU <- t(OTU)
# calculate reads per sample:
OTU2 <- OTU
OTU2 <- as.data.frame(OTU2)
OTU2$reads <- rowSums(OTU2)
# add to meta
meta$reads <- OTU2$reads
OTU <- as.matrix(OTU)
#convert an abundance matrix to a presence-absence matrix. (make any number greater than 0 into a 1)
OTU[OTU > 0] <- 1 #converts from abundance to P/A
OTU <- as.data.frame(OTU)
# calculate number of different otus in a sample
OTU$OTUs <- rowSums(OTU)
# add to meta
meta$OTUs <- OTU$OTUs
rm(OTU)
# save new meta
sample_data(ps) <- sample_data(meta)
#save(ps, file = 'ps_FINAL')
mean(meta$OTUs)
## [1] 1480.243
# 1480.243
# mean values of OTUs in soil layers
x <- meta %>%
dplyr::group_by(depth) %>%
dplyr::summarise(OTUs_mean = mean(OTUs, na.rm = TRUE), OTUs_se = (sd(OTUs, na.rm = TRUE)/sqrt(length((OTUs)))))
print(paste("how many OTUs on average in each soil layer"))
## [1] "how many OTUs on average in each soil layer"
print(x)
## # A tibble: 5 × 3
## depth OTUs_mean OTUs_se
## <chr> <dbl> <dbl>
## 1 0...10 2184. 183.
## 2 10...20 2495. 134.
## 3 20...30 1900. 187.
## 4 30...40 607. 86.0
## 5 40... 215. 39.9
y <- meta %>%
dplyr::group_by(depth) %>%
dplyr::summarise(reads_mean = mean(reads, na.rm = TRUE), reads_se = (sd(reads, na.rm = TRUE)/sqrt(length((reads)))))
print(paste("how many reads on average in each soil layer"))
## [1] "how many reads on average in each soil layer"
print(y)
## # A tibble: 5 × 3
## depth reads_mean reads_se
## <chr> <dbl> <dbl>
## 1 0...10 80957. 4871.
## 2 10...20 95172. 6279.
## 3 20...30 97826. 8155.
## 4 30...40 102635. 15860.
## 5 40... 35534. 8482.
xy <- left_join(x, y, by="depth")
# save
#write.csv2(xy, file = "OTUs_and_reads_in_depths.csv", row.names = FALSE)
library("metagMisc")
ps_x <- phyloseq_average(
ps,
avg_type = "arithmetic",
group = "depth",
drop_group_zero = FALSE,
verbose = FALSE,
progress = NULL
)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 5 samples ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# 20610 taxa and 5 samples
OTU <- as.data.frame(otu_table(ps_x))
OTU <- t(OTU)
OTU <- as.matrix(OTU)
#convert an abundance matrix to a presence-absence matrix. (make any number greater than 0 into a 1)
OTU[OTU > 0] <- 1 #converts from abundance to P/A
OTU <- as.data.frame(OTU)
# calculate number of different otus in a sample
OTU$OTUs <- rowSums(OTU)
OTU$depth <- rownames(OTU)
OTU <- OTU[, 20611:20612]
print(paste("how many OTUs in total in each soil layer"))
## [1] "how many OTUs in total in each soil layer"
print(OTU)
## OTUs depth
## 0...10 14737 0...10
## 10...20 16268 10...20
## 20...30 14763 20...30
## 30...40 5367 30...40
## 40... 2563 40...
library(car)
# Using leveneTest()
result = leveneTest(OTUs ~ depth, meta)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 16.055 8.907e-11 ***
## 135
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# not homogenous
# perform the Kruskal test
kruskal.test(OTUs ~ depth, data = meta)
##
## Kruskal-Wallis rank sum test
##
## data: OTUs by depth
## Kruskal-Wallis chi-squared = 91.593, df = 4, p-value < 2.2e-16
pairwise.wilcox.test(meta$OTUs, meta$depth,
p.adjust.method = "BH")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: meta$OTUs and meta$depth
##
## 0...10 10...20 20...30 30...40
## 10...20 0.30647 - - -
## 20...30 0.33226 0.02782 - -
## 30...40 2.4e-09 8.9e-10 4.1e-07 -
## 40... 8.9e-10 8.9e-10 1.7e-09 0.00065
##
## P value adjustment method: BH
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Saprotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4842 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 4842 taxa by 11 taxonomic ranks ]
# 4842 taxa and 140 samples
# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")
# get the data into data frame
richness_df <- richness$data
# the observed richness is in the column "value", lets change this to saprotroph_richness
colnames(richness_df)[30] <- "saprotroph_richness"
# and lets remove the "variable" and "se" columns
richness_df <- subset(richness_df, select = -variable)
richness_df <- subset(richness_df, select = -se)
FG_richness <- richness_df
x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Symbiotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 784 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 784 taxa by 11 taxonomic ranks ]
# 784 taxa and 140 samples
# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")
# get the data
richness_df <- richness$data
# the observed richness is in the column "value", lets change this to symbiotroph_richness
colnames(richness_df)[30] <- "symbiotroph_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 30)]
# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")
FG_richness <- subset(FG_richness, select = -samples)
# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]
x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Pathotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1500 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 27 sample variables ]
## tax_table() Taxonomy Table: [ 1500 taxa by 11 taxonomic ranks ]
# 1500 taxa and 140 samples
# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")
# get the data
richness_df <- richness$data
# the observed richness is in the column "value", lets change this to pathotroph_richness
colnames(richness_df)[30] <- "pathotroph_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 30)]
# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")
# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]
Note! AMF richness calculated from FUNGuild is exactly the same (not shown here)!
x_sub <- subset_taxa(ps, phylum %in% c("Glomeromycota"))
x_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 263 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples
# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")
# get the data
richness_df <- richness$data
# the observed richness is in the column "value", lets change this to AMF_richness
colnames(richness_df)[31] <- "AMF_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 31)]
# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")
# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]
# I actually want the meta data from the ps rather than the ps_FG, so leyt's remove extra columns
FG_richness <- FG_richness[, c(1, 28:31)]
meta <- dplyr::left_join(meta, FG_richness, by = "sampleID")
# sampleID into rownames
rownames(meta) <- meta[,1]
sample_data(ps) <- sample_data(meta)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps, file = 'ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
Note! The CLD letters that will be added to the fiugures are based on statistical test results done at step 6.
# create your own color palette for sample types
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
# OTU richness
OTU_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(observed, na.rm = TRUE), se = (sd(observed, na.rm = TRUE)/sqrt(length((observed))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Fungal richness") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
print(OTU_rich)
Change to thousands of reads (x1000)
rich_k <- meta %>% dplyr::mutate(richness_k = observed/1000 ) %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(richness_k, na.rm = TRUE), se = (sd(richness_k, na.rm = TRUE)/sqrt(length((richness_k))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Fungal richness \n (x1000)") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
rich_k
rich_k <- meta %>% dplyr::mutate(richness_k = observed/1000 ) %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(richness_k, na.rm = TRUE), se = (sd(richness_k, na.rm = TRUE)/sqrt(length((richness_k))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Fungal richness \n (x1000)") +
labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
rich_k <- rich_k + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))
rich_k <- rich_k + coord_flip()
rich_k
rich_k <- meta %>% dplyr::mutate(richness_k = observed/1000 ) %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(richness_k, na.rm = TRUE), se = (sd(richness_k, na.rm = TRUE)/sqrt(length((richness_k))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Fungal richness \n (x1000)") +
labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
rich_k <- rich_k + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))
rich_k <- rich_k +
annotate("text", x=1.5, y=1.5, label= "b", col="#1167b1", size=3) +
annotate("text", x=1.5, y=1.1, label= "a", col="#fbc02d", size=3) +
annotate("text", x=1.5, y=3, label= "c", col="#8a8a8a", size=3) +
annotate("text", x=1.5, y=2.6, label= "c", col="#b71c1c", size=3) +
annotate("text", x=11, y=1.4, label= "a", col="#1167b1", size=3) +
annotate("text", x=11, y=2.0, label= "ab", col="#fbc02d", size=3) +
annotate("text", x=11, y=2.55, label= "ab", col="#8a8a8a", size=3) +
annotate("text", x=11, y=3.0, label= "b", col="#b71c1c", size=3) +
annotate("text", x=25, y=0.2, label= "(ns)", size=3) +
annotate("text", x=35, y=1.2, label= "(ns)", size=3) +
annotate("text", x=59, y=0.7, label= "ab", col="#1167b1", size=3) +
annotate("text", x=59, y=1.2, label= "ab", col="#fbc02d", size=3) +
annotate("text", x=59, y=1.45, label= "b", col="#8a8a8a", size=3) +
annotate("text", x=59, y=0.95, label= "a", col="#b71c1c", size=3)
rich_k <- rich_k + coord_flip()
rich_k
gm_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(AMF_richness, na.rm = TRUE), se = (sd(AMF_richness, na.rm = TRUE)/sqrt(length((AMF_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="AMF richness \n") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
gm_rich
gm_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(AMF_richness, na.rm = TRUE), se = (sd(AMF_richness, na.rm = TRUE)/sqrt(length((AMF_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="AMF richness \n") +
labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
gm_rich <- gm_rich + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))
gm_rich <- gm_rich +
annotate("text", x=1.5, y=7, label= "a", col="#1167b1", size=3) +
annotate("text", x=1.5, y=12, label= "ab", col="#fbc02d", size=3) +
annotate("text", x=1.5, y=17, label= "ab", col="#8a8a8a", size=3) +
annotate("text", x=1.5, y=22, label= "b", col="#b71c1c", size=3) +
annotate("text", x=11, y=11, label= "a", col="#1167b1", size=3) +
annotate("text", x=11, y=30, label= "b", col="#fbc02d", size=3) +
annotate("text", x=11, y=23, label= "ab", col="#8a8a8a", size=3) +
annotate("text", x=11, y=27, label= "b", col="#b71c1c", size=3) +
annotate("text", x=21, y=9, label= "a", col="#1167b1", size=3) +
annotate("text", x=21, y=37, label= "b", col="#fbc02d", size=3) +
annotate("text", x=21, y=27, label= "ab", col="#8a8a8a", size=3) +
annotate("text", x=21, y=20, label= "a", col="#b71c1c", size=3) +
annotate("text", x=31, y=8, label= "ab", col="#1167b1", size=3) +
annotate("text", x=31, y=34, label= "b", col="#fbc02d", size=3) +
annotate("text", x=31, y=17, label= "ab", col="#8a8a8a", size=3) +
annotate("text", x=31, y=4, label= "a", col="#b71c1c", size=3) +
annotate("text", x=59, y=13, label= "(ns)", size=3)
gm_rich <- gm_rich + coord_flip()
gm_rich
Saprotroph_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(saprotroph_richness, na.rm = TRUE), se = (sd(saprotroph_richness, na.rm = TRUE)/sqrt(length((saprotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Saprotroph richness \n") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
Saprotroph_rich
Saprotroph_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(saprotroph_richness, na.rm = TRUE), se = (sd(saprotroph_richness, na.rm = TRUE)/sqrt(length((saprotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Saprotroph richness \n") +
labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
Saprotroph_rich <- Saprotroph_rich + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))
Saprotroph_rich <- Saprotroph_rich +
annotate("text", x=1.5, y=450, label= "a", col="#1167b1", size=3) +
annotate("text", x=1.5, y=550, label= "a", col="#fbc02d", size=3) +
annotate("text", x=1.5, y=830, label= "b", col="#8a8a8a", size=3) +
annotate("text", x=1.5, y=690, label= "ab", col="#b71c1c", size=3) +
annotate("text", x=11, y=490, label= "a", col="#1167b1", size=3) +
annotate("text", x=11, y=900, label= "b", col="#fbc02d", size=3) +
annotate("text", x=11, y=680, label= "ab", col="#8a8a8a", size=3) +
annotate("text", x=11, y=800, label= "ab", col="#b71c1c", size=3) +
annotate("text", x=21, y=220, label= "a", col="#1167b1", size=3) +
annotate("text", x=21, y=770, label= "b", col="#fbc02d", size=3) +
annotate("text", x=21, y=600, label= "ab", col="#8a8a8a", size=3) +
annotate("text", x=21, y=450, label= "ab", col="#b71c1c", size=3) +
annotate("text", x=35, y=450, label= "(ns)", size=3) +
annotate("text", x=59, y=200, label= "(ns)", size=3)
Saprotroph_rich <- Saprotroph_rich + coord_flip()
Saprotroph_rich
symb_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(symbiotroph_richness, na.rm = TRUE), se = (sd(symbiotroph_richness, na.rm = TRUE)/sqrt(length((symbiotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Symbiotroph richness \n") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
symb_rich
symb_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(symbiotroph_richness, na.rm = TRUE), se = (sd(symbiotroph_richness, na.rm = TRUE)/sqrt(length((symbiotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Symbiotroph richness \n") +
labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
symb_rich <- symb_rich + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))
symb_rich <- symb_rich + coord_flip()
symb_rich
path_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(pathotroph_richness, na.rm = TRUE), se = (sd(pathotroph_richness, na.rm = TRUE)/sqrt(length((pathotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Pathotroph richness \n") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
path_rich
path_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(pathotroph_richness, na.rm = TRUE), se = (sd(pathotroph_richness, na.rm = TRUE)/sqrt(length((pathotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Pathotroph richness \n") +
labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
path_rich <- path_rich + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))
path_rich <- path_rich +
annotate("text", x=1.5, y=90, label= "a", col="#1167b1", size=3) +
annotate("text", x=1.5, y=60, label= "a", col="#fbc02d", size=3) +
annotate("text", x=1.5, y=310, label= "b", col="#8a8a8a", size=3) +
annotate("text", x=1.5, y=330, label= "b", col="#b71c1c", size=3) +
annotate("text", x=11, y=56, label= "a", col="#1167b1", size=3) +
annotate("text", x=11, y=118, label= "a", col="#fbc02d", size=3) +
annotate("text", x=11, y=275, label= "b", col="#8a8a8a", size=3) +
annotate("text", x=11, y=331, label= "b", col="#b71c1c", size=3) +
annotate("text", x=25, y=250, label= "(ns)", size=3) +
annotate("text", x=35, y=70, label= "(ns)", size=3) +
annotate("text", x=59, y=50, label= "(ns)", size=3)
path_rich <- path_rich + coord_flip()
path_rich
figure <- ggarrange(rich_k, gm_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), Saprotroph_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), path_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
labels = c("A", "B", "C", "D"),
ncol = 4, nrow = 1, common.legend = TRUE, legend = "right", widths = c(1.25, 0.9, 0.9, 0.9))
figure
library("multcomp")
Change depth and richness measure accordingly
Test these: - observed - AMF_richness - saprotroph_richness - pathotroph_richness
library(car)
means_and_ses <- list()
meta$depth <- as.factor(meta$depth)
rich <-c("observed", "AMF_richness", "saprotroph_richness", "pathotroph_richness")
for (i in rich) {
for (j in levels(meta$depth)) {
df <- filter(meta, depth==j)
print(i)
print(j)
# Levene's test for homogeneity of variance
result <- leveneTest(df[[i]] ~ sample_type, data = df)
print(result)
# Kruskal-Wallis test
k <- kruskal.test(df[[i]] ~ sample_type, data = df)
print(k)
# Pairwise Wilcoxon test
w <- pairwise.wilcox.test(df[[i]], df$sample_type, p.adjust.method = "BH")
print(w)
# ANOVA
res.aov <- aov(df[[i]] ~ sample_type, data = df)
aov_summary <- summary(res.aov)
print(aov_summary)
# Tukey's HSD test
tukey <- TukeyHSD(res.aov)
print(tukey)
# Group by and summarize
mean_and_se <- df %>%
group_by(sample_type) %>%
summarise(mean = mean(!!sym(i), na.rm = TRUE), se = sd(!!sym(i), na.rm = TRUE) / sqrt(n()))
# Store the result in the list with a descriptive name
result_name <- paste("depth", j, "diversity", i, sep = "_")
means_and_ses[[result_name]] <- mean_and_se
}
}
## [1] "observed"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.0589 0.3849
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 17.971, df = 3, p-value = 0.0004458
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.59636 - -
## organic 0.03636 0.00093 -
## conventional 0.03636 0.00093 0.67297
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 17009022 5669674 16.24 5.61e-06 ***
## Residuals 24 8379531 349147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -345.2917 -1448.8240 758.2407 0.8235682
## organic-forest 1417.2083 313.6760 2520.7407 0.0084047
## conventional-forest 1246.1111 159.4276 2332.7947 0.0204125
## organic-meadow 1762.5000 947.4873 2577.5127 0.0000209
## conventional-meadow 1591.4028 799.3528 2383.4527 0.0000593
## conventional-organic -171.0972 -963.1472 620.9527 0.9323217
##
## [1] "observed"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.6778 0.1983
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.1193, df = 3, p-value = 0.02775
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.200 - -
## organic 0.068 0.459 -
## conventional 0.068 0.068 0.541
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 4612134 1537378 4.074 0.0179 *
## Residuals 24 9056485 377354
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 611.9167 -535.3253 1759.159 0.4695196
## organic-forest 1043.5417 -103.7003 2190.784 0.0838551
## conventional-forest 1288.4444 158.7186 2418.170 0.0212276
## organic-meadow 431.6250 -415.6694 1278.919 0.5083506
## conventional-meadow 676.5278 -146.8943 1499.950 0.1343587
## conventional-organic 244.9028 -578.5193 1068.325 0.8442175
##
## [1] "observed"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.0857 0.1287
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 4.6562, df = 3, p-value = 0.1988
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.15 - -
## organic 0.25 0.96 -
## conventional 0.42 0.96 0.72
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 4742212 1580737 1.755 0.183
## Residuals 24 21619303 900804
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 1334.2083 -438.3317 3106.7484 0.1894002
## organic-forest 1381.5833 -390.9567 3154.1234 0.1662604
## conventional-forest 1032.6667 -712.8101 2778.1435 0.3804286
## organic-meadow 47.3750 -1261.7326 1356.4826 0.9996332
## conventional-meadow -301.5417 -1573.7656 970.6822 0.9131616
## conventional-organic -348.9167 -1621.1406 923.3072 0.8729624
##
## [1] "observed"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.6803 0.06959 .
## 24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 3.7523, df = 3, p-value = 0.2895
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.46 - -
## organic 0.46 0.46 -
## conventional 0.46 0.46 0.67
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 986615 328872 1.716 0.19
## Residuals 24 4599845 191660
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 624.41667 -193.1945 1442.0279 0.1795937
## organic-forest 289.91667 -527.6945 1107.5279 0.7630092
## conventional-forest 342.44444 -462.6834 1147.5723 0.6489479
## organic-meadow -334.50000 -938.3459 269.3459 0.4370977
## conventional-meadow -281.97222 -868.8049 304.8605 0.5563050
## conventional-organic 52.52778 -534.3049 639.3605 0.9945665
##
## [1] "observed"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.3401 0.2848
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.863, df = 3, p-value = 0.01249
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.153 - -
## organic 0.056 0.056 -
## conventional 0.175 0.963 0.056
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 407159 135720 4.089 0.0177 *
## Residuals 24 796553 33190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 114.75000 -225.487851 454.98785 0.7889685
## organic-forest 337.87500 -2.362851 678.11285 0.0520810
## conventional-forest 77.11111 -257.931964 412.15419 0.9197120
## organic-meadow 223.12500 -28.157306 474.40731 0.0944700
## conventional-meadow -37.63889 -281.841396 206.56362 0.9736063
## conventional-organic -260.76389 -504.966396 -16.56138 0.0333018
##
## [1] "AMF_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.4636 0.7103
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.1971, df = 3, p-value = 0.04211
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.33 - -
## organic 0.13 0.56 -
## conventional 0.13 0.13 0.15
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 664.5 221.5 3.238 0.0398 *
## Residuals 24 1641.6 68.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 6.708333 -8.7373996 22.15407 0.6339029
## organic-forest 9.083333 -6.3623996 24.52907 0.3855925
## conventional-forest 15.555556 0.3456492 30.76546 0.0437049
## organic-meadow 2.375000 -9.0324298 13.78243 0.9387885
## conventional-meadow 8.847222 -2.2388068 19.93325 0.1515032
## conventional-organic 6.472222 -4.6138068 17.55825 0.3918515
##
## [1] "AMF_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.1141 0.951
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 7.4387, df = 3, p-value = 0.05916
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.076 - -
## organic 0.170 0.442 -
## conventional 0.076 0.699 0.433
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 894 298.00 4.384 0.0135 *
## Residuals 24 1631 67.97
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 18.5833333 3.186290 33.98038 0.0139049
## organic-forest 12.4583333 -2.938710 27.85538 0.1432253
## conventional-forest 17.7777778 2.615818 32.93974 0.0173327
## organic-meadow -6.1250000 -17.496470 5.24647 0.4611598
## conventional-meadow -0.8055556 -11.856638 10.24553 0.9970402
## conventional-organic 5.3194444 -5.731638 16.37053 0.5548799
##
## [1] "AMF_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.1721 0.1176
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.239, df = 3, p-value = 0.01664
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.056 - -
## organic 0.056 0.154 -
## conventional 0.404 0.068 0.402
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 2273 757.8 4.859 0.00883 **
## Residuals 24 3743 156.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 28.708333 5.385511 52.0311555 0.0119094
## organic-forest 18.083333 -5.239489 41.4061555 0.1696484
## conventional-forest 11.222222 -11.744505 34.1889499 0.5427300
## organic-meadow -10.625000 -27.850046 6.6000457 0.3447892
## conventional-meadow -17.486111 -34.225847 -0.7463756 0.0383380
## conventional-organic -6.861111 -23.600847 9.8786244 0.6746486
##
## [1] "AMF_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.2858 0.1044
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.515, df = 3, p-value = 0.02317
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.073 - -
## organic 0.643 0.228 -
## conventional 0.926 0.023 0.643
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 3828 1275.9 5.884 0.00369 **
## Residuals 24 5204 216.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 26.875000 -0.6265144 54.376514 0.0571204
## organic-forest 10.375000 -17.1265144 37.876514 0.7276472
## conventional-forest -1.777778 -28.8593971 25.303842 0.9978313
## organic-meadow -16.500000 -36.8112145 3.811214 0.1408901
## conventional-meadow -28.652778 -48.3917302 -8.913825 0.0027413
## conventional-organic -12.152778 -31.8917302 7.586175 0.3463849
##
## [1] "AMF_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.591 0.2176
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 4.4541, df = 3, p-value = 0.2164
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.36 - -
## organic 0.36 0.83 -
## conventional 0.51 0.36 0.36
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 267.9 89.31 1.488 0.243
## Residuals 24 1440.9 60.04
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 6.0416667 -8.429273 20.512607 0.6619918
## organic-forest 7.4166667 -7.054273 21.887607 0.5032636
## conventional-forest 0.7777778 -13.472219 15.027774 0.9987481
## organic-meadow 1.3750000 -9.312497 12.062497 0.9842975
## conventional-meadow -5.2638889 -15.650269 5.122492 0.5126347
## conventional-organic -6.6388889 -17.025269 3.747492 0.3148737
##
## [1] "saprotroph_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.236 0.3185
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.335, df = 3, p-value = 0.01592
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.921 - -
## organic 0.170 0.046 -
## conventional 0.283 0.046 0.283
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 552978 184326 5.106 0.00711 **
## Residuals 24 866367 36099
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 22.66667 -332.1680785 377.5014 0.9980002
## organic-forest 354.66667 -0.1680785 709.5014 0.0501395
## conventional-forest 219.11111 -130.3059916 568.5282 0.3308818
## organic-meadow 332.00000 69.9371730 594.0628 0.0094204
## conventional-meadow 196.44444 -58.2348452 451.1237 0.1730235
## conventional-organic -135.55556 -390.2348452 119.1237 0.4713122
##
## [1] "saprotroph_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.3935 0.2689
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.4841, df = 3, p-value = 0.037
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.073 - -
## organic 0.170 0.193 -
## conventional 0.073 0.185 0.888
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 368992 122997 3.91 0.0209 *
## Residuals 24 754986 31458
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 405.16667 73.92528 736.40806 0.0125214
## organic-forest 249.79167 -81.44972 581.03306 0.1881316
## conventional-forest 260.11111 -66.07286 586.29508 0.1519732
## organic-meadow -155.37500 -400.01298 89.26298 0.3201964
## conventional-meadow -145.05556 -382.80094 92.68983 0.3540753
## conventional-organic 10.31944 -227.42594 248.06483 0.9993682
##
## [1] "saprotroph_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.4484 0.2535
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.2572, df = 3, p-value = 0.02606
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.036 - -
## organic 0.267 0.292 -
## conventional 0.447 0.036 0.541
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 784367 261456 3.683 0.0259 *
## Residuals 24 1703653 70986
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 542.7083 45.12498 1040.29168 0.0289462
## organic-forest 358.2083 -139.37502 855.79168 0.2210832
## conventional-forest 230.3333 -259.65288 720.31955 0.5737676
## organic-meadow -184.5000 -551.98966 182.98966 0.5204628
## conventional-meadow -312.3750 -669.51075 44.76075 0.1015303
## conventional-organic -127.8750 -485.01075 229.26075 0.7576950
##
## [1] "saprotroph_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.706 0.1924
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 6.5626, df = 3, p-value = 0.08723
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.388 - -
## organic 0.864 0.249 -
## conventional 0.864 0.091 0.482
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 192644 64215 3.255 0.0392 *
## Residuals 24 473414 19726
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 186.708333 -75.59003 449.006696 0.2294520
## organic-forest 2.083333 -260.21503 264.381696 0.9999961
## conventional-forest 5.111111 -253.18246 263.404684 0.9999398
## organic-meadow -184.625000 -378.34518 9.095179 0.0656538
## conventional-meadow -181.597222 -369.85940 6.664952 0.0614447
## conventional-organic 3.027778 -185.23440 191.289952 0.9999677
##
## [1] "saprotroph_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.8324 0.4892
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 3.6785, df = 3, p-value = 0.2983
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.39 - -
## organic 0.39 0.76 -
## conventional 0.39 0.47 0.81
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 9406 3135 0.892 0.459
## Residuals 24 84343 3514
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 52.66667 -58.04632 163.37965 0.5643329
## organic-forest 47.29167 -63.42132 158.00465 0.6458956
## conventional-forest 20.00000 -89.02261 129.02261 0.9568518
## organic-meadow -5.37500 -87.14196 76.39196 0.9978224
## conventional-meadow -32.66667 -112.12987 46.79653 0.6726275
## conventional-organic -27.29167 -106.75487 52.17153 0.7798186
##
## [1] "pathotroph_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 3.5826 0.02853 *
## 24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 19.378, df = 3, p-value = 0.0002283
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.93091 - -
## organic 0.01818 0.00047 -
## conventional 0.01818 0.00047 1.00000
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 404412 134804 25.49 1.23e-07 ***
## Residuals 24 126933 5289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -11.041667 -146.86103 124.7777 0.9959110
## organic-forest 234.833333 99.01397 370.6527 0.0004079
## conventional-forest 240.666667 106.92100 374.4123 0.0002508
## organic-meadow 245.875000 145.56575 346.1843 0.0000031
## conventional-meadow 251.708333 154.22526 349.1914 0.0000013
## conventional-organic 5.833333 -91.64974 103.3164 0.9983535
##
## [1] "pathotroph_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.2339 0.1102
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 21.058, df = 3, p-value = 0.0001024
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.02909 - -
## organic 0.01818 0.00047 -
## conventional 0.01818 0.00047 0.13879
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 298602 99534 29.77 2.95e-08 ***
## Residuals 24 80239 3343
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 61.62500 -46.36138 169.6114 0.4114727
## organic-forest 219.25000 111.26362 327.2364 0.0000513
## conventional-forest 274.66667 168.32903 381.0043 0.0000013
## organic-meadow 157.62500 77.87177 237.3782 0.0000742
## conventional-meadow 213.04167 135.53546 290.5479 0.0000005
## conventional-organic 55.41667 -22.08954 132.9229 0.2261312
##
## [1] "pathotroph_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.2613 0.1071
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.05, df = 3, p-value = 0.04499
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.073 - -
## organic 0.073 0.157 -
## conventional 0.104 0.386 0.888
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 93092 31031 2.547 0.0797 .
## Residuals 24 292356 12182
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 75.58333 -130.54194 281.7086 0.7443125
## organic-forest 171.70833 -34.41694 377.8336 0.1266336
## conventional-forest 157.66667 -45.31147 360.6448 0.1684675
## organic-meadow 96.12500 -56.10861 248.3586 0.3250720
## conventional-meadow 82.08333 -65.86114 230.0278 0.4357382
## conventional-organic -14.04167 -161.98614 133.9028 0.9935429
##
## [1] "pathotroph_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.1511 0.3488
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 5.8552, df = 3, p-value = 0.1189
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.18 - -
## organic 0.49 0.75 -
## conventional 0.18 0.18 0.47
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 1589 529.8 1.81 0.172
## Residuals 24 7024 292.7
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 11.208333 -20.742112 43.15878 0.7687688
## organic-forest 15.958333 -15.992112 47.90878 0.5247314
## conventional-forest 24.444444 -7.018179 55.90707 0.1683269
## organic-meadow 4.750000 -18.846968 28.34697 0.9441916
## conventional-meadow 13.236111 -9.696019 36.16824 0.4016916
## conventional-organic 8.486111 -14.446019 31.41824 0.7390336
##
## [1] "pathotroph_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.33 0.2879
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 7.1979, df = 3, p-value = 0.06585
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.083 - -
## organic 0.083 0.665 -
## conventional 0.083 0.727 0.665
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 114.1 38.03 1.368 0.276
## Residuals 24 666.9 27.79
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 4.166667 -5.678021 14.011355 0.6524360
## organic-forest 6.166667 -3.678021 16.011355 0.3317956
## conventional-forest 2.111111 -7.583267 11.805490 0.9308262
## organic-meadow 2.000000 -5.270784 9.270784 0.8719962
## conventional-meadow -2.055556 -9.121487 5.010376 0.8525672
## conventional-organic -4.055556 -11.121487 3.010376 0.4065242
# Combine list into a single dataframe
combined_df <- bind_rows(lapply(names(means_and_ses), function(name) {
df <- means_and_ses[[name]]
df$result_name <- name
return(df)
}), .id = "id")
# View the combined dataframe
print(combined_df)
## # A tibble: 80 × 5
## id sample_type mean se result_name
## <chr> <fct> <dbl> <dbl> <chr>
## 1 1 forest 1478. 438. depth_0...10_diversity_observed
## 2 1 meadow 1132. 158. depth_0...10_diversity_observed
## 3 1 organic 2895. 242. depth_0...10_diversity_observed
## 4 1 conventional 2724. 188. depth_0...10_diversity_observed
## 5 2 forest 1608. 93.0 depth_10...20_diversity_observed
## 6 2 meadow 2220. 219. depth_10...20_diversity_observed
## 7 2 organic 2652. 265. depth_10...20_diversity_observed
## 8 2 conventional 2897. 181. depth_10...20_diversity_observed
## 9 3 forest 792. 249. depth_20...30_diversity_observed
## 10 3 meadow 2126. 226. depth_20...30_diversity_observed
## # ℹ 70 more rows
# save the mean and se values
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#write.csv2(combined_df, file = "Richness_mean_and_ses.csv")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA <- transform(ps, "compositional")
meta <- meta(ps)
ps_RA_x <- aggregate_rare(ps_RA, level = "phylum", detection = 0, prevalence = 0)
ps_RA_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 14 taxa by 1 taxonomic ranks ]
# 14 taxa and 140 samples
#create data table
x_df <- psmelt(ps_RA_x)
x_df$year <- "2019"
x <- x_df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Ascomycota 0.661 0.0163
## 2 2019 Basidiomycota 0.235 0.0159
## 3 2019 Mortierellomycota 0.0759 0.00873
## 4 2019 Glomeromycota 0.0240 0.00613
## 5 2019 Rozellomycota 0.00228 0.00109
library(car)
taxa <- y$OTU
for (i in taxa) {
df <- filter(x_df, OTU == i)
print(i)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)
w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
print(w)
}
## [1] "Ascomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.5105 0.2146
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 27.535, df = 3, p-value = 4.546e-06
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 5.9e-05 - -
## organic 2.2e-07 0.42 -
## conventional 1.4e-07 0.35 0.70
##
## P value adjustment method: BH
## [1] "Basidiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.2681 0.2879
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 26.818, df = 3, p-value = 6.428e-06
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 1.1e-05 - -
## organic 1.1e-05 0.52 -
## conventional 2.2e-07 0.48 0.21
##
## P value adjustment method: BH
## [1] "Mortierellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.6242 0.1867
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 7.126, df = 3, p-value = 0.06799
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.902 - -
## organic 0.902 0.902 -
## conventional 0.180 0.180 0.076
##
## P value adjustment method: BH
## [1] "Glomeromycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.0182 0.3867
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 5.8342, df = 3, p-value = 0.12
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.14 - -
## organic 0.27 0.36 -
## conventional 0.30 0.27 0.91
##
## P value adjustment method: BH
## [1] "Rozellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 4.5426 0.004562 **
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 11.375, df = 3, p-value = 0.00986
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.120 - -
## organic 0.058 0.120 -
## conventional 0.436 0.120 0.041
##
## P value adjustment method: BH
x <- x_df %>%
filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups: OTU [5]
## OTU sample_type mean se
## <chr> <fct> <dbl> <dbl>
## 1 Ascomycota forest 0.402 0.0486
## 2 Ascomycota meadow 0.670 0.0293
## 3 Ascomycota organic 0.692 0.0264
## 4 Ascomycota conventional 0.711 0.0231
## 5 Basidiomycota forest 0.533 0.0555
## 6 Basidiomycota meadow 0.210 0.0261
## 7 Basidiomycota organic 0.220 0.0247
## 8 Basidiomycota conventional 0.170 0.0191
## 9 Glomeromycota forest 0.00368 0.00177
## 10 Glomeromycota meadow 0.0325 0.0121
## 11 Glomeromycota organic 0.0333 0.0158
## 12 Glomeromycota conventional 0.0149 0.00690
## 13 Mortierellomycota forest 0.0470 0.0158
## 14 Mortierellomycota meadow 0.0860 0.0195
## 15 Mortierellomycota organic 0.0522 0.0120
## 16 Mortierellomycota conventional 0.0975 0.0167
## 17 Rozellomycota forest 0.0131 0.00974
## 18 Rozellomycota meadow 0.000315 0.0000708
## 19 Rozellomycota organic 0.000163 0.0000405
## 20 Rozellomycota conventional 0.00228 0.000714
x_df_nf <- subset(x_df, sample_type!="forest")
x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"
x <- x_df_nf %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Ascomycota 0.692 0.0151
## 2 2019 Basidiomycota 0.199 0.0134
## 3 2019 Mortierellomycota 0.0793 0.00956
## 4 2019 Glomeromycota 0.0264 0.00683
## 5 2019 Chytridiomycota 0.00209 0.000362
taxa <- y$OTU
x <- x_df_nf %>%
filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups: OTU [5]
## OTU depth mean se
## <chr> <fct> <dbl> <dbl>
## 1 Ascomycota 0...10 0.702 0.0227
## 2 Ascomycota 10...20 0.682 0.0190
## 3 Ascomycota 20...30 0.696 0.0207
## 4 Ascomycota 30...40 0.627 0.0486
## 5 Ascomycota 40... 0.752 0.0431
## 6 Basidiomycota 0...10 0.265 0.0198
## 7 Basidiomycota 10...20 0.283 0.0188
## 8 Basidiomycota 20...30 0.196 0.0248
## 9 Basidiomycota 30...40 0.114 0.0320
## 10 Basidiomycota 40... 0.136 0.0364
## # ℹ 15 more rows
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#write.csv2(x, file = "5_Phyla_in_soil_layers_WITHOUT_forest_mean.csv")
library(car)
library("rcompanion")
library("multcompView")
taxa <- y$OTU
for (i in taxa) {
df <- subset(x_df_nf, OTU==i)
# Using leveneTest()
print(i)
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Ascomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 7.2906 2.73e-05 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 4.2424, df = 4, p-value = 0.3742
##
## 0...10 10...20 20...30 30...40
## 10...20 0.6802454 NA NA NA
## 20...30 0.8626083 0.6802454 NA NA
## 30...40 0.6802454 0.8626083 0.7375178 NA
## 40... 0.6574517 0.6574517 0.6574517 0.6212054
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Basidiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.0875 0.3659
## 120
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 38.942, df = 4, p-value = 7.161e-08
##
## 0...10 10...20 20...30 30...40
## 10...20 6.119242e-01 NA NA NA
## 20...30 3.091939e-02 4.118506e-03 NA NA
## 30...40 2.260766e-06 2.260766e-06 0.002588052 NA
## 40... 1.622194e-03 5.646255e-04 0.025631670 0.6721398
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "c"
## [1] "Mortierellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 5.562 0.0003841 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 39.225, df = 4, p-value = 6.259e-08
##
## 0...10 10...20 20...30 30...40
## 10...20 8.866322e-02 NA NA NA
## 20...30 3.972787e-05 3.234148e-04 NA NA
## 30...40 1.302516e-06 1.302516e-06 0.02136213 NA
## 40... 8.777488e-01 7.468220e-01 0.04799830 0.0008671149
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "a"
## [1] "Glomeromycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 4.9745 0.0009581 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 23.482, df = 4, p-value = 0.0001014
##
## 0...10 10...20 20...30 30...40
## 10...20 9.666451e-03 NA NA NA
## 20...30 3.425034e-05 0.009666451 NA NA
## 30...40 8.343651e-04 0.003236620 0.04959274 NA
## 40... 5.593752e-01 0.510709727 0.33419998 0.1147993
## 0...10 10...20 20...30 30...40 40...
## "a" "b" "c" "d" "abcd"
## [1] "Chytridiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 4.0054 0.004377 **
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 84.286, df = 4, p-value < 2.2e-16
##
## 0...10 10...20 20...30 30...40
## 10...20 5.507318e-01 NA NA NA
## 20...30 3.796944e-01 2.030024e-01 NA NA
## 30...40 1.071611e-08 1.071611e-08 1.991630e-07 NA
## 40... 6.881199e-10 6.881199e-10 7.192628e-09 0.1413528
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "b" "b"
taxa <- "Glomeromycota"
# Construct a data.frame with the selected taxonomic group
df <- filter(x_df_nf, OTU == taxa & depth_numerical > 40)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.1016 0.35
## 22
x <- df %>%
group_by(sample_type) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 3 × 3
## sample_type mean se
## <fct> <dbl> <dbl>
## 1 meadow 0.0779 0.0511
## 2 organic 0.0404 0.0179
## 3 conventional 0.0165 0.0139
# one-way ANOVA
res.aov <- aov(Abundance ~ sample_type, data = df)
# Summary of the analysis
print(summary(res.aov))
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 2 0.0161 0.008049 0.992 0.387
## Residuals 22 0.1785 0.008113
x_df_nf <- subset(x_df, sample_type=="forest")
x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"
x <- x_df_nf %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Basidiomycota 0.533 0.0555
## 2 2019 Ascomycota 0.402 0.0486
## 3 2019 Mortierellomycota 0.0470 0.0158
## 4 2019 Rozellomycota 0.0131 0.00974
## 5 2019 Glomeromycota 0.00368 0.00177
taxa <- y$OTU
x <- x_df_nf %>%
filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups: OTU [5]
## OTU depth mean se
## <chr> <fct> <dbl> <dbl>
## 1 Ascomycota 0...10 0.478 0.0839
## 2 Ascomycota 10...20 0.516 0.0422
## 3 Ascomycota 20...30 0.274 0.160
## 4 Ascomycota 30...40 0.407 0.129
## 5 Ascomycota 40... 0.336 0.109
## 6 Basidiomycota 0...10 0.449 0.121
## 7 Basidiomycota 10...20 0.457 0.0382
## 8 Basidiomycota 20...30 0.597 0.213
## 9 Basidiomycota 30...40 0.510 0.149
## 10 Basidiomycota 40... 0.649 0.0941
## # ℹ 15 more rows
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#write.csv2(x, file = "5_Phyla_in_soil_layers_ONLY_forest_mean.csv")
taxa <- "Glomeromycota"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 3.1178 0.06588 .
## 10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
kruskal.test(Abundance ~ depth, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 7.3861, df = 4, p-value = 0.1168
ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 68 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples
#create data table
x_df <- psmelt(ps_RA_x)
x_df$year <- "2019"
x <- x_df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes 0.282 0.0200
## 2 2019 Sordariomycetes 0.169 0.0124
## 3 2019 Dothideomycetes 0.127 0.0128
## 4 2019 Tremellomycetes 0.126 0.0105
## 5 2019 Agaricomycetes 0.0884 0.0145
taxa <- y$OTU
x <- x_df %>%
filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups: OTU [5]
## OTU sample_type mean se
## <chr> <fct> <dbl> <dbl>
## 1 Agaricomycetes forest 0.448 0.0653
## 2 Agaricomycetes meadow 0.0583 0.00947
## 3 Agaricomycetes organic 0.0592 0.0217
## 4 Agaricomycetes conventional 0.0215 0.00727
## 5 Dothideomycetes forest 0.0501 0.0130
## 6 Dothideomycetes meadow 0.189 0.0351
## 7 Dothideomycetes organic 0.109 0.0178
## 8 Dothideomycetes conventional 0.115 0.0151
## 9 Leotiomycetes forest 0.202 0.0354
## 10 Leotiomycetes meadow 0.301 0.0367
## 11 Leotiomycetes organic 0.270 0.0398
## 12 Leotiomycetes conventional 0.302 0.0375
## 13 Sordariomycetes forest 0.0309 0.00929
## 14 Sordariomycetes meadow 0.0874 0.0157
## 15 Sordariomycetes organic 0.239 0.0229
## 16 Sordariomycetes conventional 0.225 0.0210
## 17 Tremellomycetes forest 0.0757 0.0285
## 18 Tremellomycetes meadow 0.139 0.0215
## 19 Tremellomycetes organic 0.148 0.0209
## 20 Tremellomycetes conventional 0.110 0.0157
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#write.csv2(x, file = "5_Class_in_soil_layers_All_Management.csv")
taxa <- y$OTU
for (i in taxa) {
# Construct a data.frame with the selected taxonomic group
df <- filter(x_df, OTU == i)
print(i)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)
w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
print(w)
}
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.6472 0.586
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 3.8991, df = 3, p-value = 0.2726
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.33 - -
## organic 0.61 0.33 -
## conventional 0.33 0.61 0.33
##
## P value adjustment method: BH
## [1] "Sordariomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 8.0545 5.591e-05 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 47.221, df = 3, p-value = 3.12e-10
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.0057 - -
## organic 4.9e-07 1.9e-06 -
## conventional 4.9e-07 1.9e-06 0.6393
##
## P value adjustment method: BH
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 5.7442 0.0009917 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 9.5392, df = 3, p-value = 0.02292
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.024 - -
## organic 0.134 0.134 -
## conventional 0.024 0.415 0.560
##
## P value adjustment method: BH
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.4793 0.223
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 5.8864, df = 3, p-value = 0.1173
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.20 - -
## organic 0.20 0.71 -
## conventional 0.33 0.33 0.23
##
## P value adjustment method: BH
## [1] "Agaricomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 13.41 1.018e-07 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 44.002, df = 3, p-value = 1.508e-09
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 4.7e-08 - -
## organic 4.7e-08 0.05845 -
## conventional 4.7e-08 0.00015 0.52326
##
## P value adjustment method: BH
x <- x_df %>%
filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups: OTU [5]
## OTU sample_type mean se
## <chr> <fct> <dbl> <dbl>
## 1 Agaricomycetes forest 0.448 0.0653
## 2 Agaricomycetes meadow 0.0583 0.00947
## 3 Agaricomycetes organic 0.0592 0.0217
## 4 Agaricomycetes conventional 0.0215 0.00727
## 5 Dothideomycetes forest 0.0501 0.0130
## 6 Dothideomycetes meadow 0.189 0.0351
## 7 Dothideomycetes organic 0.109 0.0178
## 8 Dothideomycetes conventional 0.115 0.0151
## 9 Leotiomycetes forest 0.202 0.0354
## 10 Leotiomycetes meadow 0.301 0.0367
## 11 Leotiomycetes organic 0.270 0.0398
## 12 Leotiomycetes conventional 0.302 0.0375
## 13 Sordariomycetes forest 0.0309 0.00929
## 14 Sordariomycetes meadow 0.0874 0.0157
## 15 Sordariomycetes organic 0.239 0.0229
## 16 Sordariomycetes conventional 0.225 0.0210
## 17 Tremellomycetes forest 0.0757 0.0285
## 18 Tremellomycetes meadow 0.139 0.0215
## 19 Tremellomycetes organic 0.148 0.0209
## 20 Tremellomycetes conventional 0.110 0.0157
x_df_nf <- subset(x_df, sample_type!="forest")
x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"
x <- x_df_nf %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes 0.291 0.0218
## 2 2019 Sordariomycetes 0.185 0.0131
## 3 2019 Dothideomycetes 0.137 0.0140
## 4 2019 Tremellomycetes 0.132 0.0111
## 5 2019 Mortierellomycetes 0.0793 0.00956
taxa <- y$OTU
x <- x_df_nf %>%
filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups: OTU [5]
## OTU depth mean se
## <chr> <fct> <dbl> <dbl>
## 1 Dothideomycetes 0...10 0.296 0.0438
## 2 Dothideomycetes 10...20 0.193 0.0160
## 3 Dothideomycetes 20...30 0.0870 0.0164
## 4 Dothideomycetes 30...40 0.0515 0.0207
## 5 Dothideomycetes 40... 0.0548 0.0172
## 6 Leotiomycetes 0...10 0.115 0.0183
## 7 Leotiomycetes 10...20 0.150 0.0126
## 8 Leotiomycetes 20...30 0.310 0.0394
## 9 Leotiomycetes 30...40 0.422 0.0482
## 10 Leotiomycetes 40... 0.460 0.0626
## # ℹ 15 more rows
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#write.csv2(x, file = "5_Classes_in_soil_layers_WITHOUT_forest_mean.csv")
taxa <- y$OTU
for (i in taxa) {
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==i)
print(i)
# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 14.875 6.712e-10 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 43.951, df = 4, p-value = 6.568e-09
##
## 0...10 10...20 20...30 30...40
## 10...20 2.170595e-02 NA NA NA
## 20...30 1.819319e-05 3.105480e-03 NA NA
## 30...40 1.819319e-05 2.393976e-05 0.08486076 NA
## 40... 1.819319e-05 4.506015e-05 0.11043685 0.8626083
## 0...10 10...20 20...30 30...40 40...
## "a" "b" "c" "c" "c"
## [1] "Sordariomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 2.6173 0.03845 *
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 25.086, df = 4, p-value = 4.835e-05
##
## 0...10 10...20 20...30 30...40
## 10...20 3.753357e-01 NA NA NA
## 20...30 3.753357e-01 2.425450e-01 NA NA
## 30...40 8.223114e-05 8.223114e-05 0.0002982584 NA
## 40... 2.183687e-01 1.084402e-01 0.3753357254 0.1072279
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "b" "ab"
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 5.7029 0.0003088 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 64.123, df = 4, p-value = 3.937e-13
##
## 0...10 10...20 20...30 30...40
## 10...20 3.510510e-01 NA NA NA
## 20...30 9.043065e-07 2.433622e-06 NA NA
## 30...40 4.846508e-08 5.869563e-08 0.0005249541 NA
## 40... 1.060943e-07 3.675922e-07 0.0151251757 0.5900142
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "c"
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 4.5628 0.001825 **
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 72.482, df = 4, p-value = 6.787e-15
##
## 0...10 10...20 20...30 30...40
## 10...20 3.448406e-01 NA NA NA
## 20...30 3.257171e-02 1.715879e-03 NA NA
## 30...40 1.503038e-12 3.164291e-13 6.082530e-06 NA
## 40... 1.211627e-08 4.250065e-09 2.741038e-05 0.04327362
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "d"
## [1] "Mortierellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 5.562 0.0003841 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 39.225, df = 4, p-value = 6.259e-08
##
## 0...10 10...20 20...30 30...40
## 10...20 8.866322e-02 NA NA NA
## 20...30 3.972787e-05 3.234148e-04 NA NA
## 30...40 1.302516e-06 1.302516e-06 0.02136213 NA
## 40... 8.777488e-01 7.468220e-01 0.04799830 0.0008671149
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "a"
x_df_nf <- subset(x_df, sample_type=="forest")
x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"
x <- x_df_nf %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Agaricomycetes 0.448 0.0653
## 2 2019 Leotiomycetes 0.202 0.0354
## 3 2019 Tremellomycetes 0.0757 0.0285
## 4 2019 Pezizomycetes 0.0520 0.0233
## 5 2019 Dothideomycetes 0.0501 0.0130
taxa <- y$OTU
x <- x_df_nf %>%
filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups: OTU [5]
## OTU depth mean se
## <chr> <fct> <dbl> <dbl>
## 1 Agaricomycetes 0...10 0.217 0.0933
## 2 Agaricomycetes 10...20 0.395 0.0287
## 3 Agaricomycetes 20...30 0.539 0.260
## 4 Agaricomycetes 30...40 0.507 0.151
## 5 Agaricomycetes 40... 0.578 0.0838
## 6 Dothideomycetes 0...10 0.0996 0.0324
## 7 Dothideomycetes 10...20 0.0913 0.00381
## 8 Dothideomycetes 20...30 0.0418 0.0285
## 9 Dothideomycetes 30...40 0.0121 0.0120
## 10 Dothideomycetes 40... 0.00578 0.00573
## # ℹ 15 more rows
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#write.csv2(x, file = "5_Classes_in_soil_layers_ONLY_forest_mean.csv")
taxa <- y$OTU
for (i in taxa) {
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==i)
print(i)
# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Agaricomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 0.8107 0.5461
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 4.7, df = 4, p-value = 0.3195
##
## 0...10 10...20 20...30 30...40
## 10...20 1.0000000 NA NA NA
## 20...30 1.0000000 1.0 NA NA
## 30...40 0.6666667 1.0 1 NA
## 40... 0.5000000 0.5 1 1
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 0.2981 0.8726
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 2.0667, df = 4, p-value = 0.7235
##
## 0...10 10...20 20...30 30...40
## 10...20 0.7777778 NA NA NA
## 20...30 0.7777778 0.7777778 NA NA
## 30...40 0.7777778 0.7777778 0.7777778 NA
## 40... 0.7777778 0.7777778 1.0000000 0.7777778
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.3902 0.3055
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 7.1667, df = 4, p-value = 0.1273
##
## 0...10 10...20 20...30 30...40
## 10...20 0.4000000 NA NA NA
## 20...30 0.4000000 0.7777778 NA NA
## 30...40 0.4000000 0.4000000 0.5714286 NA
## 40... 0.5714286 0.7777778 1.0000000 0.4
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Pezizomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.392 0.3049
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 8.1, df = 4, p-value = 0.08798
##
## 0...10 10...20 20...30 30...40
## 10...20 0.4000000 NA NA NA
## 20...30 0.2500000 0.5714286 NA NA
## 30...40 0.2500000 0.2500000 0.25 NA
## 40... 0.7777778 0.7777778 1.00 0.5714286
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 0.5163 0.7259
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 8.5667, df = 4, p-value = 0.07289
##
## 0...10 10...20 20...30 30...40
## 10...20 0.7777778 NA NA NA
## 20...30 0.4000000 0.5000000 NA NA
## 30...40 0.4000000 0.3333333 0.5 NA
## 40... 0.3333333 0.3333333 0.5 1
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 68 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples
#create data table
x_df <- psmelt(ps_RA_x)
This sentence based on looking at the class composition barplot:
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Archaeosporomycetes in organic 30-80 cm, Geoglossomycetes in forest 10-20 cm, meadow 10-40 cm and organic 20-80 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”
Let’s test them separately
x_df_nf <- subset(x_df, sample_type=="forest")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Pezizomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.6256 0.1291
## 13
# first one-way ANOVA
res.aov <- aov(Abundance ~ compare, data = df)
# Summary of the analysis
print(summary(res.aov))
## Df Sum Sq Mean Sq F value Pr(>F)
## compare 1 0.05219 0.05219 10.96 0.00564 **
## Residuals 13 0.06193 0.00476
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
This sentence remains to be tested:
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Archaeosporomycetes in organic 30-80 cm, Geoglossomycetes in forest 10-20 cm, meadow 10-40 cm and organic 20-80 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”
x_df_nf <- subset(x_df, sample_type=="organic")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Archaeosporomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 5.1753 0.02864 *
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 0, df = 1, p-value = 1
Not significant!
This sentence remains to be tested:
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes in meadow 10-40 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”
not sig. in:
x_df_nf <- subset(x_df, sample_type=="meadow")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="20...30"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="10...20"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Geoglossomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 21.441 4.181e-05 ***
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 14.099, df = 1, p-value = 0.0001734
Is sig. in meadow 10-40 cm
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”
x_df_nf <- subset(x_df, sample_type=="conventional")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="40..."]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Microbotryomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 7.2692 0.009974 **
## 43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 4.6876, df = 1, p-value = 0.03038
Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.
x_df_nf <- subset(x_df, sample_type=="meadow")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="40..."]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Glomeromycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 7.4061 0.009755 **
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 0.93404, df = 1, p-value = 0.3338
Not sig.
Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Orbiliomycetes in meadow 10-30 cm.
x_df_nf <- subset(x_df, sample_type=="meadow")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="10...20"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="20...30"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Orbiliomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 18.614 0.0001099 ***
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 19.128, df = 1, p-value = 1.222e-05
This sentence remains valid:
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Orbiliomycetes in meadow 10-30 cm (Kruskal; p = 0.000).”
library(car)
library("rcompanion")
library("multcompView")
taxa <- "Tremellomycetes"
df <- subset(x_df, OTU==taxa)
df <- subset(df, depth=="0...10" | depth=="10...20")
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.0867 0.363
## 52
# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 10.71, df = 3, p-value = 0.0134
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
## forest meadow organic
## meadow 0.10743261 NA NA
## organic 0.06401029 0.1074326 NA
## conventional 0.10743261 0.9051620 0.04755236
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
## forest meadow organic conventional
## "ab" "ab" "a" "b"
x <- df %>% group_by(sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 4 × 3
## sample_type mean se
## <fct> <dbl> <dbl>
## 1 forest 0.129 0.0614
## 2 meadow 0.214 0.0263
## 3 organic 0.275 0.0214
## 4 conventional 0.196 0.0149
ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 68 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples
#create data table
x_df <- psmelt(ps_RA_x)
df <- subset(x_df, sample_type!="forest")
df$year <- "2019"
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes 0.291 0.0218
## 2 2019 Sordariomycetes 0.185 0.0131
## 3 2019 Dothideomycetes 0.137 0.0140
## 4 2019 Tremellomycetes 0.132 0.0111
## 5 2019 Mortierellomycetes 0.0793 0.00956
taxa <- y$OTU
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$C_g_per_kg, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Leotiomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$C_g_per_kg
## S = 481191, p-value = 1.685e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.4783141
##
## [1] "Sordariomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$C_g_per_kg
## S = 289884, p-value = 0.2245
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1094195
##
## [1] "Dothideomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$C_g_per_kg
## S = 104195, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6798935
##
## [1] "Tremellomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$C_g_per_kg
## S = 95314, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.7071777
##
## [1] "Mortierellomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$C_g_per_kg
## S = 410271, p-value = 0.003354
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2604336
AMF PERMANOVA will be done at genus level, because the PERMANOVA is used to support the AMF bubble plot in STEP 13, which is done at genus level.
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_GM <- subset_taxa(ps, phylum=="Glomeromycota")
ps_GM
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 263 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples
ps_GM <- aggregate_rare(ps_GM, level = "genus", detection = 0, prevalence = 0)
ps_GM
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 17 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 140 samples
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps_GM, "compositional")
otu <- abundances(ps_RA)
meta <- meta(ps)
PERMANOVA cannot handle NAs, so I have to remove samples that do not have any AMF taxa from the analysis
# check how many AMF with dim (although I already know it is 17)
dim(otu)
## [1] 17 140
# If column sum adds up to zero, it means that that sample doesn't have any AMF. check how many zero values samples I have, and remove sample if all (17) are zero
x <- colSums(otu==0)==17
z <- which(x, arr.ind = FALSE, useNames = TRUE)
print(z)
## CG9.1_30to40 CG9.1_40to70 CPO5.1_40to70 CPO5.2_40to70 CR14.1_40to80
## 4 5 20 25 35
## M2_40to60 M3_30to40 M3_40to60 NG2A1_40to70 NG2B3_40to70
## 55 59 60 65 90
## OG10.2_40to70 OG10.3_30to40 OG10.3_40to70 OR13.1_30to40 OR13.1_40to80
## 110 114 115 129 130
Samples_toRemove <- c("CG9.1_30to40", "CG9.1_40to70", "CPO5.1_40to70", "CPO5.2_40to70", "CR14.1_40to80", "M2_40to60", "M3_30to40", "M3_40to60", "NG2A1_40to70", "NG2B3_40to70", "OG10.2_40to70", "OG10.3_30to40", "OG10.3_40to70", "OR13.1_30to40", "OR13.1_40to80")
ps_GM_pruned <- subset_samples(ps_RA, !(sampleID %in% Samples_toRemove))
ps_GM_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 17 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 125 samples remained in the dataset
# lets make sure all missing taxa are removed
prev0 = apply(X = otu_table(ps_GM_pruned),
MARGIN = ifelse(taxa_are_rows(ps_GM_pruned), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_GM_pruned = prune_taxa((prev0 > 0), ps_GM_pruned)
ps_GM_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 17 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 125 samples
# Pick relative abundances (compositional) and sample metadata
ps_RA <- ps_GM_pruned
otu <- abundances(ps_RA)
meta <- meta(ps_RA)
# note! the distance matrix is now at genus level!
ps_RA_bray <- phyloseq::distance(ps_RA, method = "bray")
GP.ord <- ordinate(ps_RA, "PCoA", "bray")
p1 = plot_ordination(ps_RA, GP.ord, type="samples", color="sample_type", shape = "depth")
print(p1)
# first with just soil type and strata option
a <- adonis2(formula = ps_RA_bray~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
print(a)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks: strata
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
## Df SumOfSqs R2 F Pr(>F)
## sample_type 3 4.029 0.11393 5.186 1e-04 ***
## Residual 121 31.336 0.88607
## Total 124 35.366 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# then with just depth and strata option
a <- adonis2(formula = ps_RA_bray~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
print(a)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks: strata
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
## Df SumOfSqs R2 F Pr(>F)
## depth 4 2.978 0.08421 2.7584 1e-04 ***
## Residual 120 32.388 0.91579
## Total 124 35.366 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The AMF communities at genus level differed more between management types (PERMANOVA; R2 = 0.114; p = 0.000) than between soil layers (PERMANOVA; R2 = 0.084; p = 0.000).
# For the full model it matters which "by" option we choose. When by="terms" will assess significance for each term sequentially from first to last, so that the order of terms matter. We will use this, because with sequential analysis we will get R2 values that sum up to 1, and will also get the significance and R2 values for each interaction term separately rather than for the interaction alone
# because sample type had larger R2 I will put it first in the model
# note interaction term was not significant!
final <- adonis2(formula = ps_RA_bray ~ sample_type + depth, data = meta, permutations = 9999, method = "bray", by = "terms")
print(final)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ sample_type + depth, data = meta, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## sample_type 3 4.029 0.11393 5.5646 1e-04 ***
## depth 4 3.097 0.08758 3.2082 2e-04 ***
## Residual 117 28.239 0.79849
## Total 124 35.366 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_bray,factors=meta$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs forest 1 1.1125799 3.748075 0.06973412 0.009 0.054
## 2 conventional vs meadow 1 1.2654046 4.477131 0.05563234 0.003 0.018
## 3 conventional vs organic 1 0.6465733 2.776924 0.03664604 0.022 0.132
## 4 forest vs meadow 1 0.6459536 2.162313 0.04310632 0.075 0.450
## 5 forest vs organic 1 1.6460377 7.515467 0.14310959 0.001 0.006
## 6 meadow vs organic 1 2.5882231 11.141036 0.13563301 0.001 0.006
## sig
## 1
## 2 .
## 3
## 4
## 5 *
## 6 *
x <- as.data.frame(pair.mod)
print(x)
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs forest 1 1.1125799 3.748075 0.06973412 0.009 0.054
## 2 conventional vs meadow 1 1.2654046 4.477131 0.05563234 0.003 0.018
## 3 conventional vs organic 1 0.6465733 2.776924 0.03664604 0.022 0.132
## 4 forest vs meadow 1 0.6459536 2.162313 0.04310632 0.075 0.450
## 5 forest vs organic 1 1.6460377 7.515467 0.14310959 0.001 0.006
## 6 meadow vs organic 1 2.5882231 11.141036 0.13563301 0.001 0.006
## sig
## 1
## 2 .
## 3
## 4
## 5 *
## 6 *
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#write.csv2(x, file = "AMF_Pairwise_PERMANOVA_by_MANAGEMENT.csv")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_bray,factors=meta$depth)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 0...10 vs 10...20 1 0.08290182 0.3780362 0.006952001 0.844 1.00
## 2 0...10 vs 20...30 1 0.34215045 1.3034979 0.023569900 0.236 1.00
## 3 0...10 vs 30...40 1 1.17951804 4.3871337 0.080664918 0.006 0.06
## 4 0...10 vs 40... 1 1.18589735 4.2227763 0.089422448 0.005 0.05
## 5 10...20 vs 20...30 1 0.30058651 1.2249127 0.022180438 0.279 1.00
## 6 10...20 vs 30...40 1 1.30971371 5.2305082 0.094703242 0.003 0.03
## 7 10...20 vs 40... 1 1.55697160 6.0029207 0.122501284 0.002 0.02
## 8 20...30 vs 30...40 1 0.53352935 1.7961256 0.034676834 0.134 1.00
## 9 20...30 vs 40... 1 0.85776912 2.7351625 0.059804369 0.031 0.31
## 10 30...40 vs 40... 1 0.30239257 0.9246894 0.023160842 0.453 1.00
## sig
## 1
## 2
## 3
## 4 .
## 5
## 6 .
## 7 .
## 8
## 9
## 10
meta$depth <- as.factor(meta$depth)
for (i in levels(meta$depth)) {
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == i)
# lets make sure all missing taxa are removed
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
meta_subset <- meta(ps_RA_subset)
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
x <- as.data.frame(pairwise.adonis(ps_RA_subset_bray, factors=meta_subset$sample_type))
x$depth <- i
print(x)
}
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.3750125 1.841343 0.1093347 0.127 0.381
## 2 conventional vs organic 1 0.6560974 3.970385 0.2092939 0.013 0.039
## 3 meadow vs organic 1 0.3202003 1.587460 0.1018421 0.182 0.546
## sig depth
## 1 0...10
## 2 . 0...10
## 3 0...10
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.3639673 2.205031 0.12816200 0.085 0.255
## 2 conventional vs organic 1 0.1200414 1.070250 0.06659824 0.347 1.000
## 3 meadow vs organic 1 0.6154410 3.748793 0.21121396 0.032 0.096
## sig depth
## 1 10...20
## 2 10...20
## 3 10...20
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.9973650 4.0361192 0.21202427 0.001 0.003
## 2 conventional vs organic 1 0.1803777 0.7225702 0.04595751 0.647 1.000
## 3 meadow vs organic 1 1.1247420 7.0350643 0.33444463 0.002 0.006
## sig depth
## 1 * 20...30
## 2 20...30
## 3 * 20...30
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.57259160 1.8617351 0.11737273 0.137 0.411
## 2 conventional vs organic 1 0.06892252 0.2136963 0.01749645 0.912 1.000
## 3 meadow vs organic 1 0.49742310 1.9092142 0.13726255 0.110 0.330
## sig depth
## 1 30...40
## 2 30...40
## 3 30...40
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.3635791 0.8991967 0.09083532 0.503 1.000
## 2 conventional vs organic 1 0.2129225 0.6313847 0.07314988 0.620 1.000
## 3 meadow vs organic 1 0.6051674 1.8342283 0.16929939 0.175 0.525
## sig depth
## 1 40...
## 2 40...
## 3 40...
library(vegan)
library(goeveg)
library(metagMisc)
library(phyloseq)
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(car)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# add sample_type_depth# add soil_type_depth
sample_data(ps)$soil_type_depth <- paste(sample_data(ps)$sample_type, "_", sample_data(ps)$depth)
meta <- meta(ps)
Note! first transform to RA and then filter AMF
# Relative abundance
ps_RA <- microbiome::transform(ps, 'compositional')
ps_RA
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 33 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# keep only Glomeromycota
ps2_std_G <- subset_taxa(ps_RA, phylum=="Glomeromycota")
ps2_std_G
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 263 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 33 sample variables ]
## tax_table() Taxonomy Table: [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples
For the bubble plot, we want to get average values based on soil_type_depth
library("metagMisc")
ps2_std_G <- phyloseq_average(
ps2_std_G,
avg_type = "arithmetic",
group = "soil_type_depth",
drop_group_zero = FALSE,
verbose = FALSE,
progress = NULL
)
ps2_std_G
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 263 taxa and 20 samples ]
## tax_table() Taxonomy Table: [ 263 taxa by 7 taxonomic ranks ]
# aggregate
ps2_std_G <- aggregate_rare(ps2_std_G, level = 'genus', detection = 0, prevalence = 0, include.lowest = TRUE)
ps2_std_G
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 17 taxa and 20 samples ]
## tax_table() Taxonomy Table: [ 17 taxa by 1 taxonomic ranks ]
tax_table_G <- as.data.frame(as.matrix(tax_table(ps2_std_G)))
OTU_genus_table_G <- as.data.frame(as.matrix(otu_table(ps2_std_G)))
# make OTUs into columns
OTU_genus_table_G <- t(OTU_genus_table_G)
OTU_genus_table_G <- as.data.frame(OTU_genus_table_G)
# add total of phylum glomeracea
OTU_genus_table_G <- cbind(
OTU_genus_table_G,
total = rowSums(OTU_genus_table_G
))
# Change your data structure from a wide format to a long format. Put any variables that are not OTUs/genus, into the id parameter
#convert data frame from a wide format to a long format
pc <- tibble::rownames_to_column(OTU_genus_table_G, "Sample")
pcm = melt(pc, id = c("Sample"))
meta_x <- meta[!duplicated(meta$soil_type_depth),]
# Vector of column names to select
columns_to_keep <- c("depth", "sample_type", "soil_type_depth")
# Using base R to select columns
meta_x <- meta_x[, columns_to_keep]
rownames(meta_x) <- NULL
# add metadata
pcm <- left_join(pcm, meta_x, by=c("Sample"="soil_type_depth"))
# change the column name
colnames(pcm)[1] <- "soil_type_depth"
pcm$soil_type_depth <- as.factor(pcm$soil_type_depth)
# Change the order of samples
pcm$soil_type_depth <- factor(pcm$soil_type_depth,levels=c("forest _ 0...10", "forest _ 10...20", "forest _ 20...30", "forest _ 30...40", "forest _ 40...", "meadow _ 0...10", "meadow _ 10...20", "meadow _ 20...30", "meadow _ 30...40", "meadow _ 40...", "organic _ 0...10", "organic _ 10...20", "organic _ 20...30", "organic _ 30...40", "organic _ 40...", "conventional _ 0...10", "conventional _ 10...20", "conventional _ 20...30", "conventional _ 30...40", "conventional _ 40..."))
levels(pcm$soil_type_depth)
## [1] "forest _ 0...10" "forest _ 10...20" "forest _ 20...30"
## [4] "forest _ 30...40" "forest _ 40..." "meadow _ 0...10"
## [7] "meadow _ 10...20" "meadow _ 20...30" "meadow _ 30...40"
## [10] "meadow _ 40..." "organic _ 0...10" "organic _ 10...20"
## [13] "organic _ 20...30" "organic _ 30...40" "organic _ 40..."
## [16] "conventional _ 0...10" "conventional _ 10...20" "conventional _ 20...30"
## [19] "conventional _ 30...40" "conventional _ 40..."
# add the other taxonomic level annotations
TAX <- as.data.frame(as.matrix(tax_table(ps)))
row.names(TAX) <- NULL
TAX <- TAX[, 1:6]
TAX <- filter(TAX, phylum == "Glomeromycota")
TAX <- TAX[!duplicated(TAX$genus),]
pcm2 <- left_join(pcm, TAX, by = c("variable" = "genus"))
pcm2$variable[pcm2$variable == "Ambisporaceae_unclassified"] <- "Ambisporaceae (f)"
pcm2$variable[pcm2$variable == "Archaeospora"] <- "Archaeospora (g)"
pcm2$variable[pcm2$variable == "Archaeosporaceae_unclassified"] <- "Archaeosporaceae (f)"
pcm2$variable[pcm2$variable == "Archaeosporales_unclassified"] <- "Archaeosporales (o)"
pcm2$variable[pcm2$variable == "Diversispora"] <- "Diversispora (g)"
pcm2$variable[pcm2$variable == "Claroideoglomus"] <- "Claroideoglomus (g)"
pcm2$variable[pcm2$variable == "Entrophospora"] <- "Entrophospora (g)"
pcm2$variable[pcm2$variable == "Claroideoglomeraceae_unclassified"] <- "Claroideoglomeraceae (f)"
pcm2$variable[pcm2$variable == "Dominikia"] <- "Dominikia (g)"
pcm2$variable[pcm2$variable == "Funneliformis"] <- "Funneliformis (g)"
pcm2$variable[pcm2$variable == "Glomeraceae_unclassified"] <- "Glomeraceae (f)"
pcm2$variable[pcm2$variable == "Glomus"] <- "Glomus (g)"
pcm2$variable[pcm2$variable == "Microdominikia"] <- "Microdominikia (g)"
pcm2$variable[pcm2$variable == "Rhizophagus"] <- "Rhizophagus (g)"
pcm2$variable[pcm2$variable == "Glomeromycota_unclassified"] <- "Glomeromycota (p)"
pcm2$variable[pcm2$variable == "Paraglomus"] <- "Paraglomus (g)"
pcm2$variable[pcm2$variable == "Paraglomerales_unclassified"] <- "Paraglomerales (o)"
pcm2$variable[pcm2$variable == "total"] <- "total"
# add total to family
pcm2$family <- pcm2$family %>% replace_na('total')
pcm2$family[pcm2$family=="Ambisporaceae"] <- "Ambisporaceae (f)"
pcm2$family[pcm2$family=="Archaeosporaceae"] <- "Archaeosporaceae (f)"
pcm2$family[pcm2$family=="Archaeosporales_unclassified"] <- "Archaeosporales (o)"
pcm2$family[pcm2$family=="Diversisporaceae"] <- "Diversisporaceae (f)"
pcm2$family[pcm2$family=="Entrophosporaceae"] <- "Entrophosporaceae (f)"
pcm2$family[pcm2$family=="Claroideoglomeraceae"] <- "Claroideoglomeraceae (f)"
pcm2$family[pcm2$family=="Glomeraceae"] <- "Glomeraceae (f)"
pcm2$family[pcm2$family=="Glomeromycota_unclassified"] <- "Glomeromycota (p)"
pcm2$family[pcm2$family=="Paraglomeraceae"] <- "Paraglomeraceae (f)"
pcm2$family[pcm2$family=="Paraglomerales_unclassified"] <- "Paraglomerales (o)"
# make into factor
pcm2$family <- as.factor(pcm2$family)
levels(pcm2$family)
## [1] "Ambisporaceae (f)" "Archaeosporaceae (f)"
## [3] "Archaeosporales (o)" "Claroideoglomeraceae (f)"
## [5] "Diversisporaceae (f)" "Entrophosporaceae (f)"
## [7] "Glomeraceae (f)" "Glomeromycota (p)"
## [9] "Paraglomeraceae (f)" "Paraglomerales (o)"
## [11] "total"
# Change level family
pcm2$family <- factor(pcm2$family, levels = c("Ambisporaceae (f)", "Archaeosporaceae (f)", "Diversisporaceae (f)", "Glomeraceae (f)", "Claroideoglomeraceae (f)", "Entrophosporaceae (f)", "Paraglomeraceae (f)", "Glomeromycota (p)", "Archaeosporales (o)", "Paraglomerales (o)", "total"))
# make variable into factor
pcm2$variable <- as.factor(pcm2$variable)
# Change genus level order
pcm2$variable <- factor(pcm2$variable, levels = c("Archaeosporales (o)", "Ambisporaceae (f)", "Archaeosporaceae (f)", "Archaeospora (g)", "Diversispora (g)", "Glomeraceae (f)", "Dominikia (g)", "Funneliformis (g)", "Glomus (g)", "Microdominikia (g)", "Rhizophagus (g)", "Claroideoglomeraceae (f)", "Claroideoglomus (g)", "Entrophospora (g)", "Paraglomerales (o)", "Paraglomus (g)", "Glomeromycota (p)", "total"))
levels(pcm2$variable)
## [1] "Archaeosporales (o)" "Ambisporaceae (f)"
## [3] "Archaeosporaceae (f)" "Archaeospora (g)"
## [5] "Diversispora (g)" "Glomeraceae (f)"
## [7] "Dominikia (g)" "Funneliformis (g)"
## [9] "Glomus (g)" "Microdominikia (g)"
## [11] "Rhizophagus (g)" "Claroideoglomeraceae (f)"
## [13] "Claroideoglomus (g)" "Entrophospora (g)"
## [15] "Paraglomerales (o)" "Paraglomus (g)"
## [17] "Glomeromycota (p)" "total"
pcm2$AMF_guild <- NA
pcm2$AMF_guild[pcm2$family=="Ambisporaceae (f)"] <- "ancestral"
pcm2$AMF_guild[pcm2$family=="Archaeosporaceae (f)"] <- "ancestral"
pcm2$AMF_guild[pcm2$family=="Archaeosporales (o)"] <- "unknown"
pcm2$AMF_guild[pcm2$family=="Diversisporaceae (f)"] <- "edaphophilic"
pcm2$AMF_guild[pcm2$family=="Claroideoglomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Entrophosporaceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Glomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Glomeromycota (p)"] <- "unknown"
pcm2$AMF_guild[pcm2$family=="Paraglomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Paraglomerales (o)"] <- "unknown"
pcm2$new_depth <- NA
pcm2$new_depth[pcm2$depth=="0...10"]<-"0-10"
pcm2$new_depth[pcm2$depth=="10...20"]<-"10-20"
pcm2$new_depth[pcm2$depth=="20...30"]<-"20-30"
pcm2$new_depth[pcm2$depth=="30...40"]<-"30-40"
pcm2$new_depth[pcm2$depth=="40..."]<-"40-80"
Currently, AMF are divided in five orders (Archaeosporales, Diversisporales, Glomerales, Entrophosporales and Paraglomerales)
# modifying the column of data frame
pcm2$order <- as.factor(pcm2$order)
levels(pcm2$order)
## [1] "Archaeosporales" "Diversisporales"
## [3] "Entrophosporales" "Glomerales"
## [5] "Glomeromycota_unclassified" "Paraglomerales"
# Change levels
pcm2$order <- factor(pcm2$order, levels = c("Archaeosporales", "Diversisporales", "Entrophosporales", "Glomerales", "Paraglomerales", "Glomeromycota_unclassified"), labels = c("Archaeosporales (o)", "Diversisporales (o)", "Entrophosporales (o)", "Glomerales (o)", "Paraglomerales (o)", "Glomeromycota (p)"))
# add order = Glomeromycota to total
pcm2$order <- pcm2$order %>% replace_na('Glomeromycota (p)')
levels(pcm2$order)
## [1] "Archaeosporales (o)" "Diversisporales (o)" "Entrophosporales (o)"
## [4] "Glomerales (o)" "Paraglomerales (o)" "Glomeromycota (p)"
For a bubble plot, you are using geom_point and scaling the size to your value (relative abundance) column.
I checked that: - thesmallest non-zero is 8.314653e-07 or 0.0000008314653 - and the biggest value is 1.164825e-01 or 0.1164825
So lets set the limits in the figure accordingly
# color for AMF_guild
MyPalette = c("red", "blue", "#ff028d", "black")
# where ("Ancestral" = "red", "Rhizophilic" = "#ff028d", "Edaphophilic" = "blue", "Unknown" = "black")
xx2 = ggplot(pcm2, aes(x = new_depth, y = variable)) +
geom_point(aes(size = value, fill = order, color = AMF_guild), shape = 21, alpha = 1, stroke = 0) +
scale_fill_manual(values=c("#99CC99", "#83adb5", "#692D6B", "#D094D2", "#cba69e", "#A9A9A9")) +
scale_size_continuous(limits = c(0.0000001, 0.2), range = c(3,30), breaks = c(0.0000001, 0.0001, 0.001, 0.01, 0.2)) +
labs( x= "soil layer (cm)", y = "", size = "Relative abundance", fill = "", color = "AMF guild") +
theme(legend.key=element_blank(),
axis.text.x = element_text(colour = "black", size = 14, angle = 45, vjust = 1, hjust = 1),
axis.title.x=element_text(colour = "black", size = 14,face="bold"),
axis.text.y = element_text(colour = "black", size = 14),
legend.text = element_text(size = 14, colour ="black"),
legend.title = element_text(size = 16, face = "bold"),
panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2)) + facet_wrap(vars(sample_type), nrow = 1, ncol = 4) +
scale_y_discrete(limits = rev(levels(pcm2$variable))) + guides(fill = guide_legend(order = 1, ncol = 2, override.aes = list(size = 8, shape = 21)), color = guide_legend(order = 2, ncol = 2, override.aes = list(shape = 15, size = 8, stroke = 2)))
#+ theme(legend.margin=margin(1,1,1,1), legend.box.spacing = unit(1.6, "pt"))
#+ theme(legend.title.align=0.5)#+ theme(legend.position="bottom")
f1 <- xx2 + scale_color_manual(values = (MyPalette)) + theme(strip.text = element_text(size = 16, color = "black"))
#+ guides(color = guide_legend(nrow = 2, override.aes = list(shape = 15, size = 2, stroke = 2))) #+ coord_flip() + guide_legend(ncol=2)
### change y axis label colors based on AMF guild
f2 <- f1 + theme(axis.text.y = element_text(color = c("black", "black", "#ff028d", "black", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "blue", "red", "red", "red", "black")))
print(f2)
Save with 1400 width and 550 height
I have previously tested the AMF genera, family, order and guild, but no other difference was found other than:
More Ambisporaceae in forest compared to other treatments (Wilcoxon).
So I will not include the tests here, They were done similarly as before e.g. for AMF relative abundance.
Simple spearman rank correlation with richness and env. variables are done WITHOUT forest because forest soil is so different environment compared to meadow, organic and conventional soils
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_nf <- subset_samples(ps, sample_type!= "forest")
meta <- meta(ps_nf)
names(meta)
## [1] "sampleID" "plot" "sampling_position"
## [4] "depth" "depth_numerical" "vegetation"
## [7] "sample_type" "root_mgg" "pH_H2O"
## [10] "EC_uScm" "C_g_per_kg" "N_gkg"
## [13] "TP_gkg" "Alox_mmolkg" "Feox_mmolkg"
## [16] "oxides_mmolkg" "PH2O_mgkg" "Porg_mgkg"
## [19] "DOC_mgkg" "Pinorg_mgkg" "C_per_N"
## [22] "observed" "chao1" "shannon"
## [25] "observed_sng" "chao1_sng" "shannon_sng"
## [28] "log_root" "saprotroph_richness" "symbiotroph_richness"
## [31] "pathotroph_richness" "AMF_richness"
Soil properties to test against:
C_per_N pH_H2O C_g_per_kg Feox_mmolkg DOC_mgkg N_gkg depth_numerical Porg_mgkg log_root TP_gkg Pinorg_mgkg Alox_mmolkg PH2O_mgkg
env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")
for (i in env) {
# Filter out rows with NA values in the columns of interest
valid_data <- meta[!is.na(meta$observed) & !is.na(meta[[i]]), ]
# Perform Spearman correlation test
x <- cor.test(valid_data$observed, valid_data[[i]], method = "spearman")
# Print the result
print(paste("Correlation test for:", i))
print(x)
}
## [1] "Correlation test for: C_per_N"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 122420, p-value = 7.695e-15
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6239005
##
## [1] "Correlation test for: pH_H2O"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 543813, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6707008
##
## [1] "Correlation test for: C_g_per_kg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 98808, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6964415
##
## [1] "Correlation test for: Feox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 145648, p-value = 2.38e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5525416
##
## [1] "Correlation test for: DOC_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 107044, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6711408
##
## [1] "Correlation test for: N_gkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 103654, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6815556
##
## [1] "Correlation test for: depth_numerical"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 568043, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7451402
##
## [1] "Correlation test for: Porg_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 108419, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6587925
##
## [1] "Correlation test for: log_root"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 147129, p-value = 3.731e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5479921
##
## [1] "Correlation test for: TP_gkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 134591, p-value = 6.626e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5865102
##
## [1] "Correlation test for: Pinorg_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 361273, p-value = 0.2224
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1099019
##
## [1] "Correlation test for: Alox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 130503, p-value = 1.582e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.59907
##
## [1] "Correlation test for: PH2O_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 258057, p-value = 0.03667
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1878602
env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")
for (i in env) {
# Filter out rows with NA values in the columns of interest
valid_data <- meta[!is.na(meta$AMF_richness) & !is.na(meta[[i]]), ]
# Perform Spearman correlation test
x <- cor.test(valid_data$AMF_richness, valid_data[[i]], method = "spearman")
# Print the result
print(paste("Correlation test for:", i))
print(x)
}
## [1] "Correlation test for: C_per_N"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 142617, p-value = 9.284e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5618529
##
## [1] "Correlation test for: pH_H2O"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 512461, p-value = 2.496e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5743796
##
## [1] "Correlation test for: C_g_per_kg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 159586, p-value = 1.27e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5097203
##
## [1] "Correlation test for: Feox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 124010, p-value = 1.425e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6190164
##
## [1] "Correlation test for: DOC_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 157639, p-value = 7.53e-10
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5157014
##
## [1] "Correlation test for: N_gkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 176552, p-value = 8.089e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4575968
##
## [1] "Correlation test for: depth_numerical"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 453384, p-value = 5.841e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3928852
##
## [1] "Correlation test for: Porg_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 204283, p-value = 4.676e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3570961
##
## [1] "Correlation test for: log_root"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 189803, p-value = 1.324e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4168873
##
## [1] "Correlation test for: TP_gkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 252601, p-value = 0.01205
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2239608
##
## [1] "Correlation test for: Pinorg_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 384409, p-value = 0.0434
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1809802
##
## [1] "Correlation test for: Alox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 133398, p-value = 4.39e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5901751
##
## [1] "Correlation test for: PH2O_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 349401, p-value = 0.271
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.09961067
ps_nf_RA <- transform(ps_nf, "compositional")
ps_nf_RA
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
gm <- aggregate_rare(ps_nf_RA, level = "phylum", detection = 0, prevalence = 0 )
gm
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 14 taxa by 1 taxonomic ranks ]
df <- psmelt(gm)
df <- subset(df, OTU == "Glomeromycota")
env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")
for (i in env) {
# Perform Spearman correlation test
x <- cor.test(df$Abundance, df[[i]], method = "spearman", na.rm = TRUE)
# Print the result
print(paste("Correlation test for:", i))
print(x)
}
## [1] "Correlation test for: C_per_N"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 338926, p-value = 0.6479
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.04124707
##
## [1] "Correlation test for: pH_H2O"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 305311, p-value = 0.492
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.06202587
##
## [1] "Correlation test for: C_g_per_kg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 369419, p-value = 0.1336
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1349274
##
## [1] "Correlation test for: Feox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 320234, p-value = 0.8579
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.01617915
##
## [1] "Correlation test for: DOC_mgkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 377055, p-value = 0.0777
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.158386
##
## [1] "Correlation test for: N_gkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 384219, p-value = 0.0441
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1803965
##
## [1] "Correlation test for: depth_numerical"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 257422, p-value = 0.01924
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2091478
##
## [1] "Correlation test for: Porg_mgkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 394534, p-value = 0.006855
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2416477
##
## [1] "Correlation test for: log_root"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 368781, p-value = 0.1393
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1329676
##
## [1] "Correlation test for: TP_gkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 430932, p-value = 0.0002288
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.323907
##
## [1] "Correlation test for: Pinorg_mgkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 348926, p-value = 0.4251
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.07197018
##
## [1] "Correlation test for: Alox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 313002, p-value = 0.6707
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.03839783
##
## [1] "Correlation test for: PH2O_mgkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 443246, p-value = 5.642e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3949508
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("tibble")
plant.div <- read.csv2("C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile\\Plant_diversity_Yoni_2011_2012.csv", dec = ".")
plant.div$Year <- as.factor(plant.div$Year)
Let’s only include organic and conventional in the non-crop coverage analysis as meadow does not have a crop
## first lets test if variances are equal so we know if use ANOVA or Kruskal test:
library(car)
df <- subset(plant.div, Year==2011)
df <- subset(df, Treatment!="meadow")
# Using leveneTest()
result = leveneTest(non_crop_cover ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.6401 0.4542
## 6
# high p-value means that variances are similar and that we can do ANOVA
# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.
shapiro.test(df$non_crop_cover)
##
## Shapiro-Wilk normality test
##
## data: df$non_crop_cover
## W = 0.98115, p-value = 0.9684
# high p value means that data is normally distributed
res.aov <- aov(non_crop_cover ~ Treatment, data = df)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 1 3793 3793 14.91 0.00834 **
## Residuals 6 1526 254
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = non_crop_cover ~ Treatment, data = df)
##
## $Treatment
## diff lwr upr p adj
## organic-conventional 43.55 15.95684 71.14316 0.0083435
Non-crop vegetation cover in 2011 is significantly different between conventional and organic
df <- subset(plant.div, Year==2012)
df <- subset(df, Treatment!="meadow")
# Using leveneTest()
result = leveneTest(non_crop_cover ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 4.76 0.08093 .
## 5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# high p-value means that variances are similar and that we can do ANOVA
# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.
shapiro.test(df$non_crop_cover)
##
## Shapiro-Wilk normality test
##
## data: df$non_crop_cover
## W = 0.74543, p-value = 0.01137
# low p value means that data is not normally distributed, but let's still do ANOVA
# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping
res.aov <- aov(non_crop_cover ~ Treatment, data = df)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 1 2885 2885.2 8.19 0.0353 *
## Residuals 5 1761 352.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = non_crop_cover ~ Treatment, data = df)
##
## $Treatment
## diff lwr upr p adj
## organic-conventional 41.025 4.175863 77.87414 0.0353283
Non-crop vegetation cover in 2012 is significantly different between conventional and organic
pal <- c(meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
df <- subset(plant.div, Treatment!="meadow")
df <- droplevels(df)
df$Treatment <- factor(df$Treatment, levels = c("meadow", "organic", "conventional"))
plot_non_crop_cover <- ggplot(df, aes(x = Treatment, y = non_crop_cover, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) +
theme(legend.position = "none",
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Non-crop vegetation cover")
plot_non_crop_cover
# subset the Year
year11 <- plant.div[plant.div$Year == "2011",]
year11 <- subset(year11, Treatment!="meadow")
year11 <- droplevels(year11)
year11$Treatment <- factor(year11$Treatment, levels = c("meadow", "organic", "conventional"))
# Plot subset
plot_non_crop_cover11 <- ggplot(year11, aes(x = Treatment, y = non_crop_cover, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) +
theme(legend.position = "none",
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Non-crop vegetation cover")
# Annotate this plot
plot_non_crop_cover11 <- plot_non_crop_cover11 +
geom_text(x = 1.5, y = 94,
label = "**",
colour = "black") +
geom_segment(x = 0.8, xend = 0.8,
y = 92, yend = 93,
colour = "black") +
geom_segment(x = 2.2, xend = 2.2,
y = 92, yend = 93,
colour = "black") +
geom_segment(x = 0.8, xend = 2.2,
y = 93, yend = 93,
colour = "black") + ylim(0, 98)
plot_non_crop_cover11
# subset the Year
year12 <- plant.div[plant.div$Year == "2012",]
year12 <- subset(year12, Treatment!="meadow")
year12 <- droplevels(year12)
# Plot subset
plot_non_crop_cover12 <- ggplot(year12, aes(x = Treatment, y = non_crop_cover, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) +
theme(legend.position = "none",
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Non-crop vegetation cover")
# Annotate this plot
plot_non_crop_cover12 <- plot_non_crop_cover12 +
geom_text(x = 1.5, y = 94,
label = "*",
colour = "black") +
geom_segment(x = 0.8, xend = 0.8,
y = 92, yend = 93,
colour = "black") +
geom_segment(x = 2.2, xend = 2.2,
y = 92, yend = 93,
colour = "black") +
geom_segment(x = 0.8, xend = 2.2,
y = 93, yend = 93,
colour = "black") + ylim(0, 98)
plot_non_crop_cover12
library(ggpubr)
plot_non_crop_cover <- ggarrange(plot_non_crop_cover11, plot_non_crop_cover12 + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
ncol = 2, nrow = 1, widths = c(1,0.8))
plot_non_crop_cover
df <- subset(plant.div, Year==2011)
# Using leveneTest()
result = leveneTest(richness ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 4.6742 0.05137 .
## 7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# high p-value means that variances are similar and that we can do ANOVA
# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.
shapiro.test(df$richness)
##
## Shapiro-Wilk normality test
##
## data: df$richness
## W = 0.88504, p-value = 0.149
# high p value means that data is normally distributed
# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping
res.aov <- aov(richness ~ Treatment, data = df)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 107 53.50 2.003 0.205
## Residuals 7 187 26.71
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = richness ~ Treatment, data = df)
##
## $Treatment
## diff lwr upr p adj
## meadow-conventional -0.5 -13.682456 12.68246 0.9931512
## organic-conventional 6.5 -4.263430 17.26343 0.2444743
## organic-meadow 7.0 -6.182456 20.18246 0.3210863
Plant richness in 2011 does not significantly differ between any treatments
df <- subset(plant.div, Year==2012)
# Using leveneTest()
result = leveneTest(richness ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 2.2103 0.1909
## 6
# high p-value means that variances are similar and that we can do ANOVA
# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.
shapiro.test(df$richness)
##
## Shapiro-Wilk normality test
##
## data: df$richness
## W = 0.88578, p-value = 0.1805
# high p value means that data is normally distributed
# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping
res.aov <- aov(richness ~ Treatment, data = df)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 192.97 96.49 7.067 0.0265 *
## Residuals 6 81.92 13.65
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = richness ~ Treatment, data = df)
##
## $Treatment
## diff lwr upr p adj
## meadow-conventional 3.750000 -6.068278 13.56828 0.5100073
## organic-conventional 10.583333 1.924426 19.24224 0.0221986
## organic-meadow 6.833333 -3.516041 17.18271 0.1867477
Plant richness in 2012 does significantly differs between conventional and organic only
pal <- c(meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
plant.div$Treatment <- factor(plant.div$Treatment, levels = c("meadow", "organic", "conventional"))
plot_rich <- ggplot(plant.div, aes(x = Treatment, y = richness, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) + theme(legend.position = "none",
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Plant richness")
plot_rich
# subset the Year
year11 <- plant.div[plant.div$Year == "2011",]
# Plot subset
plot_rich11 <- ggplot(year11, aes(x = Treatment, y = richness, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) +
theme(legend.position = "none",
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Plant richness")
# Annotate this plot
plot_rich11 <- plot_rich11 +
geom_text(x = 1, y = 34,
label = "(ns.)") + ylim(10, 35)
plot_rich11
# subset the Year
year12 <- plant.div[plant.div$Year == "2012",]
# Plot subset
plot_rich12 <- ggplot(year12, aes(x = Treatment, y = richness, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) +
theme(legend.position = "none",
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Plant richness")
# Annotate this plot
plot_rich12 <- plot_rich12 +
geom_text(x = 2.5, y = 34,
label = "*",
colour = "black") +
geom_segment(x = 1.8, xend = 1.8,
y = 32, yend = 33,
colour = "black") +
geom_segment(x = 3.2, xend = 3.2,
y = 32, yend = 33,
colour = "black") +
geom_segment(x = 1.8, xend = 3.2,
y = 33, yend = 33,
colour = "black") + ylim(10, 35)
plot_rich12
library(ggpubr)
plot_rich <- ggarrange(plot_rich11, plot_rich12 + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
ncol = 2, nrow = 1, widths = c(1,0.75))
plot_rich
df <- subset(plant.div, Year==2011)
# Using leveneTest()
result = leveneTest(Shannon ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.2186 0.8089
## 7
# high p-value means that variances are similar and that we can do ANOVA
# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.
shapiro.test(df$Shannon)
##
## Shapiro-Wilk normality test
##
## data: df$Shannon
## W = 0.97096, p-value = 0.8996
# high p value means that data is normally distributed
# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping
res.aov <- aov(Shannon ~ Treatment, data = df)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 0.2332 0.11658 1.279 0.336
## Residuals 7 0.6381 0.09115
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Treatment, data = df)
##
## $Treatment
## diff lwr upr p adj
## organic-meadow 0.415 -0.3550215 1.185022 0.3119085
## conventional-meadow 0.315 -0.4550215 1.085022 0.4874226
## conventional-organic -0.100 -0.7287200 0.528720 0.8879312
Shannon in 2011 does not significantly differ between any treatments
df <- subset(plant.div, Year==2012)
# Using leveneTest()
result = leveneTest(Shannon ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.4157 0.6775
## 6
# high p-value means that variances are similar and that we can do ANOVA
# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.
shapiro.test(df$Shannon)
##
## Shapiro-Wilk normality test
##
## data: df$Shannon
## W = 0.96756, p-value = 0.8731
# high p value means that data is normally distributed
# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping
res.aov <- aov(Shannon ~ Treatment, data = df)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 0.3887 0.1943 2.598 0.154
## Residuals 6 0.4488 0.0748
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Treatment, data = df)
##
## $Treatment
## diff lwr upr p adj
## organic-meadow 0.52833333 -0.2377047 1.2943714 0.1665450
## conventional-meadow 0.47250000 -0.2542275 1.1992275 0.1941950
## conventional-organic -0.05583333 -0.6967468 0.5850801 0.9616295
Shannon in 2012 does not significantly differ between any treatments
pal <- c(meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
plant.div$Treatment <- factor(plant.div$Treatment, levels = c("meadow", "organic", "conventional"))
plot_Shannon <- ggplot(plant.div, aes(x = Treatment, y = Shannon, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) +
theme(legend.position = "none",
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Plant Shannon diversity")
plot_Shannon
# subset the Year
year11 <- plant.div[plant.div$Year == "2011",]
# Plot subset
plot_Shannon11 <- ggplot(year11, aes(x = Treatment, y = Shannon, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) +
theme(legend.position = "none",
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Plant Shannon diversity")
# Annotate this plot
plot_Shannon11 <- plot_Shannon11 +
geom_text(x = 1, y = 2.9,
label = "(ns.)") + ylim(1.5, 3)
plot_Shannon11
# subset the Year
year12 <- plant.div[plant.div$Year == "2012",]
# Plot subset
plot_Shannon12 <- ggplot(year12, aes(x = Treatment, y = Shannon, fill = Treatment)) + facet_grid (cols = vars(Year)) +
geom_boxplot() +
scale_fill_manual(values = pal) +
theme(legend.position = "right",
legend.title = element_blank(),
plot.background = element_rect("white"),
panel.background = element_rect("white"),
panel.grid = element_line("grey90"),
axis.line = element_line("gray25"),
axis.text.y = element_text(size = 12, color = "gray25"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title = element_text(color = "gray25"),
legend.text = element_text(size = 12)) +
labs(x = NULL,
y = "Plant Shannon diversity")
# Annotate this plot
plot_Shannon12 <- plot_Shannon12 +
geom_text(x = 1, y = 2.9,
label = "(ns.)") + ylim(1.5, 3)
plot_Shannon12
library(ggpubr)
plot_Shannon <- ggarrange(plot_Shannon11, plot_Shannon12 + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
ncol = 2, nrow = 1, widths = c(1,1.63))
plot_Shannon
library(ggpubr)
fig <- ggarrange(plot_non_crop_cover, plot_rich, plot_Shannon,
ncol = 3, nrow = 1, widths = c(3,4,6))
fig
fig <- ggarrange(plot_rich, plot_Shannon,
ncol = 3, nrow = 1, widths = c(2, 3.1))
fig
library('phyloseq')
library("cowplot")
library("ggplot2")
library("vegan")
library("microbiome")
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load("ps_FINAL")#ps
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library("dplyr")
library("multcomp")
library(car)
library("rcompanion")
library("multcompView")
meta$depth <- as.factor(meta$depth)
soil.properties <- as.factor(c("C_g_per_kg", "N_gkg", "C_per_N", "DOC_mgkg", "TP_gkg",
"Porg_mgkg", "Pinorg_mgkg", "PH2O_mgkg", "pH_H2O",
"Feox_mmolkg", "Alox_mmolkg", "log_root"))
# Create a data frame with 0 rows and 6 columns
df_test <- data.frame(matrix(ncol = 6))
colnames(df_test) <- c('sample_type', 'property', 'depth', 'LevenesP', 'ANOVAsOrKrusalsP', 'mean_se_cld')
# Loop over depth and soil properties
for (i in levels(meta$depth)) {
for (j in levels(soil.properties)) {
df <- filter(meta, depth == i & sample_type != "forest")
df <- df[, c("sample_type", j)]
df <- droplevels(df)
# Perform Levene's test
levene <- leveneTest(df[[j]] ~ sample_type, df)
if (levene[1,3] > 0.05) {
# Perform ANOVA
res.aov <- aov(df[[j]] ~ sample_type, data = df)
res.aov2 <- summary(res.aov)
res.aov2 <- res.aov2[[1]]
# Create a result data frame for ANOVA
df.res.aov <- data.frame(sample_type = levels(df$sample_type),
LevenesP = sprintf("%.3f", levene[1,3]),
ANOVAsOrKrusalsP = sprintf("%.3f", res.aov2[1,5]))
# Tukey's HSD and compact letter display
tukey <- glht(res.aov, linfct = mcp(sample_type = "Tukey"))
cld <- cld(tukey)
cld <- cld[["mcletters"]][["Letters"]]
cld <- as.data.frame(cld)
cld$sample_type <- rownames(cld)
rownames(cld) <- NULL
} else {
# Perform Kruskal-Wallis test
res.aov2 <- kruskal.test(df[[j]] ~ sample_type, data = df)
df.res.aov <- data.frame(sample_type = levels(df$sample_type),
LevenesP = sprintf("%.3f", levene[1, 3]),
ANOVAsOrKrusalsP = sprintf("%.3f", res.aov2$p.value))
# Wilcoxon test and compact letter display
wilcox.res <- pairwise.wilcox.test(df[[j]], df$sample_type, p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
cld <- fullPTable(wilcox.res)
cld[is.na(cld)] <- 0
cld <- multcompLetters(cld)
cld <- cld[["Letters"]]
cld <- as.data.frame(cld)
cld$sample_type <- rownames(cld)
rownames(cld) <- NULL
}
# Calculate mean and SE grouped by sample_type
x <- df %>%
group_by(sample_type) %>%
summarise(mean = sprintf("%.3f", mean(.data[[j]], na.rm = TRUE)),
se = sprintf("%.3f", sd(.data[[j]], na.rm = TRUE) / sqrt(n())))
x$property <- j
x$depth <- i
# Merge results and append to df_test
output <- merge(x, df.res.aov, by = "sample_type")
output <- merge(output, cld, by = "sample_type")
output$mean_se_cld <- paste(output$mean, "±", output$se, output$cld, sep = "")
output <- output[, -c(2,3,8)]
df_test <- rbind(df_test, output)
}
}
df_test
## sample_type property depth LevenesP ANOVAsOrKrusalsP mean_se_cld
## 1 <NA> <NA> <NA> <NA> <NA> <NA>
## 2 conventional Alox_mmolkg 0...10 0.078 0.000 81.770±2.977b
## 3 meadow Alox_mmolkg 0...10 0.078 0.000 120.218±9.732a
## 4 organic Alox_mmolkg 0...10 0.078 0.000 76.559±2.766b
## 5 conventional C_g_per_kg 0...10 0.016 0.000 32.765±0.910c
## 6 meadow C_g_per_kg 0...10 0.016 0.000 70.317±2.595a
## 7 organic C_g_per_kg 0...10 0.016 0.000 35.276±0.678b
## 8 conventional C_per_N 0...10 0.427 0.000 11.588±0.126b
## 9 meadow C_per_N 0...10 0.427 0.000 12.962±0.174a
## 10 organic C_per_N 0...10 0.427 0.000 11.592±0.253b
## 11 conventional DOC_mgkg 0...10 0.367 0.000 325.756±24.568b
## 12 meadow DOC_mgkg 0...10 0.367 0.000 849.411±40.715a
## 13 organic DOC_mgkg 0...10 0.367 0.000 340.274±31.792b
## 14 conventional Feox_mmolkg 0...10 0.943 0.002 97.212±3.778b
## 15 meadow Feox_mmolkg 0...10 0.943 0.002 122.636±4.853a
## 16 organic Feox_mmolkg 0...10 0.943 0.002 101.175±5.494b
## 17 conventional log_root 0...10 0.453 0.000 -0.016±0.123b
## 18 meadow log_root 0...10 0.453 0.000 0.934±0.190a
## 19 organic log_root 0...10 0.453 0.000 0.145±0.120b
## 20 conventional N_gkg 0...10 0.017 0.000 2.828±0.076c
## 21 meadow N_gkg 0...10 0.017 0.000 5.425±0.191a
## 22 organic N_gkg 0...10 0.017 0.000 3.047±0.052b
## 23 conventional pH_H2O 0...10 0.219 0.013 5.841±0.043ab
## 24 meadow pH_H2O 0...10 0.219 0.013 5.692±0.110a
## 25 organic pH_H2O 0...10 0.219 0.013 6.058±0.074b
## 26 conventional PH2O_mgkg 0...10 0.057 0.003 0.388±0.053a
## 27 meadow PH2O_mgkg 0...10 0.057 0.003 0.453±0.076a
## 28 organic PH2O_mgkg 0...10 0.057 0.003 0.156±0.027b
## 29 conventional Pinorg_mgkg 0...10 0.885 0.091 566.024±49.491a
## 30 meadow Pinorg_mgkg 0...10 0.885 0.091 406.421±48.748a
## 31 organic Pinorg_mgkg 0...10 0.885 0.091 471.903±50.249a
## 32 conventional Porg_mgkg 0...10 0.273 0.002 475.803±33.557b
## 33 meadow Porg_mgkg 0...10 0.273 0.002 639.043±42.358a
## 34 organic Porg_mgkg 0...10 0.273 0.002 456.491±24.680b
## 35 conventional TP_gkg 0...10 0.964 0.102 1.042±0.041a
## 36 meadow TP_gkg 0...10 0.964 0.102 1.045±0.040a
## 37 organic TP_gkg 0...10 0.964 0.102 0.928±0.043a
## 38 conventional Alox_mmolkg 10...20 0.158 0.000 80.116±3.141b
## 39 meadow Alox_mmolkg 10...20 0.158 0.000 131.117±8.026a
## 40 organic Alox_mmolkg 10...20 0.158 0.000 75.829±3.234b
## 41 conventional C_g_per_kg 10...20 0.136 0.000 32.043±1.281b
## 42 meadow C_g_per_kg 10...20 0.136 0.000 46.864±1.829a
## 43 organic C_g_per_kg 10...20 0.136 0.000 34.764±0.705b
## 44 conventional C_per_N 10...20 0.198 0.042 11.614±0.129ab
## 45 meadow C_per_N 10...20 0.198 0.042 12.445±0.393a
## 46 organic C_per_N 10...20 0.198 0.042 11.437±0.273b
## 47 conventional DOC_mgkg 10...20 0.013 0.001 284.888±10.603b
## 48 meadow DOC_mgkg 10...20 0.013 0.001 495.767±37.094a
## 49 organic DOC_mgkg 10...20 0.013 0.001 327.089±21.578b
## 50 conventional Feox_mmolkg 10...20 0.812 0.000 97.640±4.588b
## 51 meadow Feox_mmolkg 10...20 0.812 0.000 140.049±4.099a
## 52 organic Feox_mmolkg 10...20 0.812 0.000 102.482±5.157b
## 53 conventional log_root 10...20 0.460 0.002 -0.709±0.191b
## 54 meadow log_root 10...20 0.460 0.002 0.336±0.129a
## 55 organic log_root 10...20 0.460 0.002 -0.267±0.202ab
## 56 conventional N_gkg 10...20 0.046 0.001 2.756±0.095c
## 57 meadow N_gkg 10...20 0.046 0.001 3.790±0.186a
## 58 organic N_gkg 10...20 0.046 0.001 3.048±0.078b
## 59 conventional pH_H2O 10...20 0.259 0.000 5.994±0.047b
## 60 meadow pH_H2O 10...20 0.259 0.000 5.675±0.082a
## 61 organic pH_H2O 10...20 0.259 0.000 6.122±0.068b
## 62 conventional PH2O_mgkg 10...20 0.576 0.059 0.238±0.042a
## 63 meadow PH2O_mgkg 10...20 0.576 0.059 0.113±0.026a
## 64 organic PH2O_mgkg 10...20 0.576 0.059 0.151±0.037a
## 65 conventional Pinorg_mgkg 10...20 0.881 0.153 475.825±40.072a
## 66 meadow Pinorg_mgkg 10...20 0.881 0.153 373.108±43.638a
## 67 organic Pinorg_mgkg 10...20 0.881 0.153 488.146±47.331a
## 68 conventional Porg_mgkg 10...20 0.737 0.237 466.746±30.861a
## 69 meadow Porg_mgkg 10...20 0.737 0.237 521.230±32.480a
## 70 organic Porg_mgkg 10...20 0.737 0.237 454.918±17.320a
## 71 conventional TP_gkg 10...20 0.722 0.593 0.943±0.042a
## 72 meadow TP_gkg 10...20 0.722 0.593 0.894±0.030a
## 73 organic TP_gkg 10...20 0.722 0.593 0.943±0.038a
## 74 conventional Alox_mmolkg 20...30 0.093 0.000 62.675±3.216b
## 75 meadow Alox_mmolkg 20...30 0.093 0.000 107.130±9.584a
## 76 organic Alox_mmolkg 20...30 0.093 0.000 65.338±3.654b
## 77 conventional C_g_per_kg 20...30 0.120 0.003 13.505±1.994b
## 78 meadow C_g_per_kg 20...30 0.120 0.003 31.576±3.912a
## 79 organic C_g_per_kg 20...30 0.120 0.003 21.203±4.044ab
## 80 conventional C_per_N 20...30 0.120 0.089 10.429±0.723a
## 81 meadow C_per_N 20...30 0.120 0.089 12.562±0.250a
## 82 organic C_per_N 20...30 0.120 0.089 10.868±0.891a
## 83 conventional DOC_mgkg 20...30 0.214 0.002 116.479±16.148b
## 84 meadow DOC_mgkg 20...30 0.214 0.002 303.054±41.855a
## 85 organic DOC_mgkg 20...30 0.214 0.002 186.690±36.291ab
## 86 conventional Feox_mmolkg 20...30 0.587 0.000 64.312±8.328b
## 87 meadow Feox_mmolkg 20...30 0.587 0.000 140.938±11.602a
## 88 organic Feox_mmolkg 20...30 0.587 0.000 80.066±10.415b
## 89 conventional log_root 20...30 0.160 0.001 -1.442±0.204b
## 90 meadow log_root 20...30 0.160 0.001 -0.252±0.078a
## 91 organic log_root 20...30 0.160 0.001 -0.954±0.230b
## 92 conventional N_gkg 20...30 0.101 0.012 1.281±0.147b
## 93 meadow N_gkg 20...30 0.101 0.012 2.492±0.284a
## 94 organic N_gkg 20...30 0.101 0.012 1.927±0.345ab
## 95 conventional pH_H2O 20...30 0.460 0.000 6.347±0.071b
## 96 meadow pH_H2O 20...30 0.460 0.000 5.825±0.082a
## 97 organic pH_H2O 20...30 0.460 0.000 6.463±0.140b
## 98 conventional PH2O_mgkg 20...30 0.035 0.082 0.062±0.005a
## 99 meadow PH2O_mgkg 20...30 0.035 0.082 0.097±0.015a
## 100 organic PH2O_mgkg 20...30 0.035 0.082 0.086±0.011a
## 101 conventional Pinorg_mgkg 20...30 0.761 0.289 494.796±51.363a
## 102 meadow Pinorg_mgkg 20...30 0.761 0.289 383.399±36.473a
## 103 organic Pinorg_mgkg 20...30 0.761 0.289 458.051±57.651a
## 104 conventional Porg_mgkg 20...30 0.043 0.087 203.710±26.330b
## 105 meadow Porg_mgkg 20...30 0.043 0.087 271.122±52.837ab
## 106 organic Porg_mgkg 20...30 0.043 0.087 317.298±32.217a
## 107 conventional TP_gkg 20...30 0.890 0.284 0.698±0.053a
## 108 meadow TP_gkg 20...30 0.890 0.284 0.654±0.045a
## 109 organic TP_gkg 20...30 0.890 0.284 0.775±0.056a
## 110 conventional Alox_mmolkg 30...40 0.000 0.002 62.788±0.801b
## 111 meadow Alox_mmolkg 30...40 0.000 0.002 77.735±3.571a
## 112 organic Alox_mmolkg 30...40 0.000 0.002 62.620±1.690b
## 113 conventional C_g_per_kg 30...40 0.039 0.003 4.124±0.188b
## 114 meadow C_g_per_kg 30...40 0.039 0.003 9.227±1.985a
## 115 organic C_g_per_kg 30...40 0.039 0.003 4.764±0.328b
## 116 conventional C_per_N 30...40 0.025 0.007 6.165±0.471b
## 117 meadow C_per_N 30...40 0.025 0.007 11.202±2.058a
## 118 organic C_per_N 30...40 0.025 0.007 6.469±0.334b
## 119 conventional DOC_mgkg 30...40 0.061 0.003 64.818±3.470b
## 120 meadow DOC_mgkg 30...40 0.061 0.003 117.462±15.678a
## 121 organic DOC_mgkg 30...40 0.061 0.003 80.141±6.822b
## 122 conventional Feox_mmolkg 30...40 0.025 0.000 27.530±1.893b
## 123 meadow Feox_mmolkg 30...40 0.025 0.000 119.286±17.522a
## 124 organic Feox_mmolkg 30...40 0.025 0.000 31.413±2.423b
## 125 conventional log_root 30...40 0.499 0.000 -1.844±0.121c
## 126 meadow log_root 30...40 0.499 0.000 -0.753±0.087a
## 127 organic log_root 30...40 0.499 0.000 -1.494±0.076b
## 128 conventional N_gkg 30...40 0.054 0.171 0.682±0.023a
## 129 meadow N_gkg 30...40 0.054 0.171 0.851±0.102a
## 130 organic N_gkg 30...40 0.054 0.171 0.739±0.040a
## 131 conventional pH_H2O 30...40 0.324 0.001 6.814±0.085b
## 132 meadow pH_H2O 30...40 0.324 0.001 6.280±0.151a
## 133 organic pH_H2O 30...40 0.324 0.001 6.992±0.110b
## 134 conventional PH2O_mgkg 30...40 0.885 0.738 0.088±0.012a
## 135 meadow PH2O_mgkg 30...40 0.885 0.738 0.098±0.024a
## 136 organic PH2O_mgkg 30...40 0.885 0.738 0.079±0.012a
## 137 conventional Pinorg_mgkg 30...40 0.075 0.339 397.576±21.274a
## 138 meadow Pinorg_mgkg 30...40 0.075 0.339 469.409±49.171a
## 139 organic Pinorg_mgkg 30...40 0.075 0.339 433.861±27.993a
## 140 conventional Porg_mgkg 30...40 0.087 0.027 167.123±10.396b
## 141 meadow Porg_mgkg 30...40 0.087 0.027 112.548±19.473a
## 142 organic Porg_mgkg 30...40 0.087 0.027 161.557±11.063ab
## 143 conventional TP_gkg 30...40 0.080 0.817 0.565±0.017a
## 144 meadow TP_gkg 30...40 0.080 0.817 0.570±0.057a
## 145 organic TP_gkg 30...40 0.080 0.817 0.595±0.024a
## 146 conventional Alox_mmolkg 40... 0.582 0.003 51.008±1.032b
## 147 meadow Alox_mmolkg 40... 0.582 0.003 57.120±1.483a
## 148 organic Alox_mmolkg 40... 0.582 0.003 51.077±1.273b
## 149 conventional C_g_per_kg 40... 0.353 0.062 2.677±0.113a
## 150 meadow C_g_per_kg 40... 0.353 0.062 3.264±0.303a
## 151 organic C_g_per_kg 40... 0.353 0.062 2.679±0.089a
## 152 conventional C_per_N 40... 0.208 0.013 4.826±0.175b
## 153 meadow C_per_N 40... 0.208 0.013 5.883±0.387a
## 154 organic C_per_N 40... 0.208 0.013 4.939±0.130b
## 155 conventional DOC_mgkg 40... 0.427 0.003 41.959±2.555b
## 156 meadow DOC_mgkg 40... 0.427 0.003 61.773±4.866a
## 157 organic DOC_mgkg 40... 0.427 0.003 45.680±4.074b
## 158 conventional Feox_mmolkg 40... 0.000 0.000 25.711±0.788b
## 159 meadow Feox_mmolkg 40... 0.000 0.000 67.088±12.098a
## 160 organic Feox_mmolkg 40... 0.000 0.000 24.727±0.764b
## 161 conventional log_root 40... 0.393 0.000 -2.004±0.098b
## 162 meadow log_root 40... 0.393 0.000 -1.236±0.071a
## 163 organic log_root 40... 0.393 0.000 -1.555±0.126a
## 164 conventional N_gkg 40... 0.896 0.925 0.564±0.041a
## 165 meadow N_gkg 40... 0.896 0.925 0.556±0.032a
## 166 organic N_gkg 40... 0.896 0.925 0.545±0.023a
## 167 conventional pH_H2O 40... 0.446 0.003 7.244±0.082b
## 168 meadow pH_H2O 40... 0.446 0.003 6.876±0.123a
## 169 organic pH_H2O 40... 0.446 0.003 7.372±0.058b
## 170 conventional PH2O_mgkg 40... 0.837 0.219 0.136±0.028a
## 171 meadow PH2O_mgkg 40... 0.837 0.219 0.098±0.018a
## 172 organic PH2O_mgkg 40... 0.837 0.219 0.167±0.032a
## 173 conventional Pinorg_mgkg 40... 0.951 0.625 555.814±19.855a
## 174 meadow Pinorg_mgkg 40... 0.951 0.625 551.282±20.510a
## 175 organic Pinorg_mgkg 40... 0.951 0.625 579.265±23.771a
## 176 conventional Porg_mgkg 40... 0.845 0.376 128.341±13.832a
## 177 meadow Porg_mgkg 40... 0.845 0.376 99.637±13.041a
## 178 organic Porg_mgkg 40... 0.845 0.376 111.474±16.322a
## 179 conventional TP_gkg 40... 0.140 0.329 0.684±0.022a
## 180 meadow TP_gkg 40... 0.140 0.329 0.651±0.014a
## 181 organic TP_gkg 40... 0.140 0.329 0.691±0.020a
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load("ps_FINAL")#ps
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
library("cowplot")
Pinorg_mgkg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(Pinorg_mgkg, na.rm = TRUE)/1000, se = (sd(Pinorg_mgkg, na.rm = TRUE)/sqrt(length((Pinorg_mgkg))))/1000) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="P-inorg (g/kg)") +
labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
Pinorg_mgkg
PH2O_mgkg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(PH2O_mgkg, na.rm = TRUE), se = (sd(PH2O_mgkg, na.rm = TRUE)/sqrt(length((PH2O_mgkg))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="P-H2O (mg/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
PH2O_mgkg
Alox_mmolkg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(Alox_mmolkg, na.rm = TRUE), se = (sd(Alox_mmolkg, na.rm = TRUE)/sqrt(length((Alox_mmolkg))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Al-ox (mmol/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
Alox_mmolkg
Feox_mmolkg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(Feox_mmolkg, na.rm = TRUE), se = (sd(Feox_mmolkg, na.rm = TRUE)/sqrt(length((Feox_mmolkg))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Fe-ox (mmol/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
Feox_mmolkg
pH_H2O <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(pH_H2O, na.rm = TRUE), se = (sd(pH_H2O, na.rm = TRUE)/sqrt(length((pH_H2O))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="pH") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
pH_H2O
C_g_per_kg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(C_g_per_kg, na.rm = TRUE), se = (sd(C_g_per_kg, na.rm = TRUE)/sqrt(length((C_g_per_kg))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="C (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
C_g_per_kg
N_gkg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(N_gkg, na.rm = TRUE), se = (sd(N_gkg, na.rm = TRUE)/sqrt(length((N_gkg))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="N (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
N_gkg
TP_gkg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(TP_gkg, na.rm = TRUE), se = (sd(TP_gkg, na.rm = TRUE)/sqrt(length((TP_gkg))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="P-tot (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
TP_gkg
Porg_mgkg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(Porg_mgkg, na.rm = TRUE)/1000, se = (sd(Porg_mgkg, na.rm = TRUE)/sqrt(length((Porg_mgkg))))/1000) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="P-org (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
Porg_mgkg
DOC_mgkg <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(DOC_mgkg, na.rm = TRUE)/1000, se = (sd(DOC_mgkg, na.rm = TRUE)/sqrt(length((DOC_mgkg))))/1000) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="DOC (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
DOC_mgkg
# C_per_N
C_per_N <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(C_per_N, na.rm = TRUE), se = (sd(C_per_N, na.rm = TRUE)/sqrt(length((C_per_N))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="C/N") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
C_per_N
log_root <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(log_root, na.rm = TRUE), se = (sd(log_root, na.rm = TRUE)/sqrt(length((log_root))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="log10 root biomass (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
log_root
nc <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(C_per_N, na.rm = TRUE), se = (sd(C_per_N, na.rm = TRUE)/sqrt(length((C_per_N))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="C/N") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
nc
p <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(TP_gkg, na.rm = TRUE), se = (sd(TP_gkg, na.rm = TRUE)/sqrt(length((TP_gkg))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=2, position=position_dodge(1.5)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=14),
legend.text = element_text(size=12),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="P (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
p
library("ggpubr")
figure <- ggarrange(C_g_per_kg, N_gkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), C_per_N + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), DOC_mgkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), TP_gkg, Porg_mgkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), Pinorg_mgkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), PH2O_mgkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), pH_H2O, Feox_mmolkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), Alox_mmolkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), log_root + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
labels = NULL,
ncol = 4, nrow = 3, common.legend = TRUE, legend="right")
figure